aboutsummaryrefslogtreecommitdiff
path: root/test/CodeGen
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2012-08-15 19:34:23 +0000
committerDimitry Andric <dim@FreeBSD.org>2012-08-15 19:34:23 +0000
commit58b69754af0cbff56b1cfce9be9392e4451f6628 (patch)
treeeacfc83d988e4b9d11114387ae7dc41243f2a363 /test/CodeGen
parent0378662f5bd3dbe8305a485b0282bceb8b52f465 (diff)
downloadsrc-58b69754af0cbff56b1cfce9be9392e4451f6628.tar.gz
src-58b69754af0cbff56b1cfce9be9392e4451f6628.zip
Vendor import of llvm trunk r161861:vendor/llvm/llvm-trunk-r161861
Notes
Notes: svn path=/vendor/llvm/dist/; revision=239310 svn path=/vendor/llvm/llvm-trunk-r161861/; revision=239311; tag=vendor/llvm/llvm-trunk-r161861
Diffstat (limited to 'test/CodeGen')
-rw-r--r--test/CodeGen/ARM/2007-03-13-InstrSched.ll2
-rw-r--r--test/CodeGen/ARM/2007-04-03-PEIBug.ll2
-rw-r--r--test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll2
-rw-r--r--test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll2
-rw-r--r--test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll2
-rw-r--r--test/CodeGen/ARM/2009-04-06-AsmModifier.ll2
-rw-r--r--test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll2
-rw-r--r--test/CodeGen/ARM/2011-12-14-machine-sink.ll2
-rw-r--r--test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll8
-rw-r--r--test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll71
-rw-r--r--test/CodeGen/ARM/2012-05-29-TailDupBug.ll140
-rw-r--r--test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll41
-rw-r--r--test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll174
-rw-r--r--test/CodeGen/ARM/2012-08-08-legalize-unaligned.ll12
-rw-r--r--test/CodeGen/ARM/2012-08-09-neon-extload.ll102
-rw-r--r--test/CodeGen/ARM/2012-08-13-bfi.ll17
-rw-r--r--test/CodeGen/ARM/addrmode.ll2
-rw-r--r--test/CodeGen/ARM/aliases.ll2
-rw-r--r--test/CodeGen/ARM/arm-modifier.ll9
-rw-r--r--test/CodeGen/ARM/bicZext.ll19
-rw-r--r--test/CodeGen/ARM/call_nolink.ll2
-rw-r--r--test/CodeGen/ARM/cmn.ll22
-rw-r--r--test/CodeGen/ARM/coalesce-subregs.ll68
-rw-r--r--test/CodeGen/ARM/crash-greedy.ll46
-rw-r--r--test/CodeGen/ARM/cse-libcalls.ll2
-rw-r--r--test/CodeGen/ARM/data-in-code-annotations.ll42
-rw-r--r--test/CodeGen/ARM/debug-info-branch-folding.ll13
-rw-r--r--test/CodeGen/ARM/divmod.ll16
-rw-r--r--test/CodeGen/ARM/fabss.ll10
-rw-r--r--test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll17
-rw-r--r--test/CodeGen/ARM/fast-isel-call.ll95
-rw-r--r--test/CodeGen/ARM/fast-isel-frameaddr.ll100
-rw-r--r--test/CodeGen/ARM/fast-isel-intrinsic.ll32
-rw-r--r--test/CodeGen/ARM/fast-isel-shifter.ll50
-rw-r--r--test/CodeGen/ARM/fast-isel.ll12
-rw-r--r--test/CodeGen/ARM/fcopysign.ll6
-rw-r--r--test/CodeGen/ARM/floorf.ll29
-rw-r--r--test/CodeGen/ARM/fmuls.ll9
-rw-r--r--test/CodeGen/ARM/fparith.ll6
-rw-r--r--test/CodeGen/ARM/fusedMAC.ll47
-rw-r--r--test/CodeGen/ARM/iabs.ll20
-rw-r--r--test/CodeGen/ARM/ldrd.ll5
-rw-r--r--test/CodeGen/ARM/lsr-scale-addr-mode.ll2
-rw-r--r--test/CodeGen/ARM/movt-movw-global.ll8
-rw-r--r--test/CodeGen/ARM/neon_div.ll6
-rw-r--r--test/CodeGen/ARM/opt-shuff-tstore.ll2
-rw-r--r--test/CodeGen/ARM/pr13249.ll27
-rw-r--r--test/CodeGen/ARM/select.ll26
-rw-r--r--test/CodeGen/ARM/smml.ll13
-rw-r--r--test/CodeGen/ARM/str_pre-2.ll9
-rw-r--r--test/CodeGen/ARM/str_pre.ll2
-rw-r--r--test/CodeGen/ARM/struct_byval.ll46
-rw-r--r--test/CodeGen/ARM/sub-cmp-peephole.ll65
-rw-r--r--test/CodeGen/ARM/sub.ll12
-rw-r--r--test/CodeGen/ARM/thread_pointer.ll2
-rw-r--r--test/CodeGen/ARM/thumb2-it-block.ll4
-rw-r--r--test/CodeGen/ARM/tls-models.ll117
-rw-r--r--test/CodeGen/ARM/tls1.ll6
-rw-r--r--test/CodeGen/ARM/tls3.ll2
-rw-r--r--test/CodeGen/ARM/twoaddrinstr.ll21
-rw-r--r--test/CodeGen/ARM/unsafe-fsub.ll18
-rw-r--r--test/CodeGen/ARM/vcnt.ll49
-rw-r--r--test/CodeGen/ARM/vector-extend-narrow.ll8
-rw-r--r--test/CodeGen/ARM/vfp.ll8
-rw-r--r--test/CodeGen/ARM/vlddup.ll32
-rw-r--r--test/CodeGen/ARM/vmul.ll74
-rw-r--r--test/CodeGen/ARM/vst3.ll2
-rw-r--r--test/CodeGen/CPP/2007-06-16-Funcname.ll1
-rw-r--r--test/CodeGen/CellSPU/fcmp32.ll6
-rw-r--r--test/CodeGen/CellSPU/fneg-fabs.ll4
-rw-r--r--test/CodeGen/CellSPU/icmp16.ll246
-rw-r--r--test/CodeGen/CellSPU/icmp32.ll247
-rw-r--r--test/CodeGen/CellSPU/icmp8.ll180
-rw-r--r--test/CodeGen/CellSPU/shift_ops.ll32
-rw-r--r--test/CodeGen/CellSPU/stores.ll6
-rw-r--r--test/CodeGen/CellSPU/trunc.ll28
-rw-r--r--test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll2
-rw-r--r--test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll19
-rw-r--r--test/CodeGen/Generic/2012-06-08-APIntCrash.ll9
-rw-r--r--test/CodeGen/Generic/2012-07-15-BuildVectorPromote.ll8
-rw-r--r--test/CodeGen/Generic/asm-large-immediate.ll6
-rw-r--r--test/CodeGen/Generic/donothing.ll31
-rw-r--r--test/CodeGen/Generic/edge-bundles-blockIDs.ll2
-rw-r--r--test/CodeGen/Generic/print-after.ll6
-rw-r--r--test/CodeGen/Generic/print-machineinstrs.ll14
-rw-r--r--test/CodeGen/Generic/stop-after.ll10
-rw-r--r--test/CodeGen/Generic/undef-phi.ll26
-rw-r--r--test/CodeGen/Hexagon/args.ll3
-rw-r--r--test/CodeGen/Hexagon/combine.ll3
-rw-r--r--test/CodeGen/Hexagon/convertdptoint.ll26
-rw-r--r--test/CodeGen/Hexagon/convertdptoll.ll27
-rw-r--r--test/CodeGen/Hexagon/convertsptoint.ll26
-rw-r--r--test/CodeGen/Hexagon/convertsptoll.ll27
-rw-r--r--test/CodeGen/Hexagon/dadd.ll19
-rw-r--r--test/CodeGen/Hexagon/dmul.ll18
-rw-r--r--test/CodeGen/Hexagon/double.ll3
-rw-r--r--test/CodeGen/Hexagon/doubleconvert-ieee-rnd-near.ll26
-rw-r--r--test/CodeGen/Hexagon/dsub.ll18
-rw-r--r--test/CodeGen/Hexagon/dualstore.ll17
-rw-r--r--test/CodeGen/Hexagon/fadd.ll18
-rw-r--r--test/CodeGen/Hexagon/fcmp.ll37
-rw-r--r--test/CodeGen/Hexagon/float.ll3
-rw-r--r--test/CodeGen/Hexagon/floatconvert-ieee-rnd-near.ll22
-rw-r--r--test/CodeGen/Hexagon/fmul.ll19
-rw-r--r--test/CodeGen/Hexagon/frame.ll3
-rw-r--r--test/CodeGen/Hexagon/fsub.ll18
-rw-r--r--test/CodeGen/Hexagon/fusedandshift.ll16
-rw-r--r--test/CodeGen/Hexagon/macint.ll14
-rw-r--r--test/CodeGen/Hexagon/mpy.ll3
-rw-r--r--test/CodeGen/Hexagon/newvaluejump.ll33
-rw-r--r--test/CodeGen/Hexagon/newvaluejump2.ll30
-rw-r--r--test/CodeGen/Hexagon/newvaluestore.ll22
-rw-r--r--test/CodeGen/Hexagon/opt-fabs.ll15
-rw-r--r--test/CodeGen/Hexagon/opt-fneg.ll26
-rw-r--r--test/CodeGen/Hexagon/simpletailcall.ll14
-rw-r--r--test/CodeGen/Hexagon/static.ll9
-rw-r--r--test/CodeGen/Hexagon/struct_args.ll6
-rw-r--r--test/CodeGen/Hexagon/struct_args_large.ll7
-rw-r--r--test/CodeGen/Hexagon/vaddh.ll3
-rw-r--r--test/CodeGen/MSP430/2009-12-21-FrameAddr.ll6
-rw-r--r--test/CodeGen/MSP430/Inst8rr.ll2
-rw-r--r--test/CodeGen/Mips/2008-07-23-fpcmp.ll4
-rw-r--r--test/CodeGen/Mips/2008-07-29-icmp.ll2
-rw-r--r--test/CodeGen/Mips/2010-07-20-Switch.ll30
-rw-r--r--test/CodeGen/Mips/alloca.ll21
-rw-r--r--test/CodeGen/Mips/analyzebranch.ll6
-rw-r--r--test/CodeGen/Mips/and1.ll17
-rw-r--r--test/CodeGen/Mips/asm-large-immediate.ll10
-rw-r--r--test/CodeGen/Mips/atomic.ll34
-rwxr-xr-xtest/CodeGen/Mips/cmov.ll38
-rw-r--r--test/CodeGen/Mips/cprestore.ll4
-rw-r--r--test/CodeGen/Mips/eh.ll2
-rw-r--r--test/CodeGen/Mips/fabs.ll10
-rw-r--r--test/CodeGen/Mips/fastcc.ll253
-rw-r--r--test/CodeGen/Mips/fp-indexed-ls.ll12
-rw-r--r--test/CodeGen/Mips/fp-spill-reload.ll39
-rw-r--r--test/CodeGen/Mips/global-pointer-reg.ll4
-rw-r--r--test/CodeGen/Mips/gprestore.ll4
-rw-r--r--test/CodeGen/Mips/helloworld.ll34
-rw-r--r--test/CodeGen/Mips/inlineasm-cnstrnt-bad-I-1.ll15
-rw-r--r--test/CodeGen/Mips/inlineasm-cnstrnt-bad-J.ll16
-rw-r--r--test/CodeGen/Mips/inlineasm-cnstrnt-bad-K.ll16
-rw-r--r--test/CodeGen/Mips/inlineasm-cnstrnt-bad-L.ll16
-rw-r--r--test/CodeGen/Mips/inlineasm-cnstrnt-bad-N.ll17
-rw-r--r--test/CodeGen/Mips/inlineasm-cnstrnt-bad-O.ll16
-rw-r--r--test/CodeGen/Mips/inlineasm-cnstrnt-bad-P.ll16
-rw-r--r--test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll44
-rw-r--r--test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll20
-rw-r--r--test/CodeGen/Mips/inlineasm-operand-code.ll153
-rw-r--r--test/CodeGen/Mips/inlineasm_constraint.ll55
-rw-r--r--test/CodeGen/Mips/inlineasmmemop.ll2
-rw-r--r--test/CodeGen/Mips/internalfunc.ll8
-rw-r--r--test/CodeGen/Mips/largeimmprinting.ll5
-rw-r--r--test/CodeGen/Mips/lb1.ll18
-rw-r--r--test/CodeGen/Mips/lbu1.ll19
-rw-r--r--test/CodeGen/Mips/lh1.ll18
-rw-r--r--test/CodeGen/Mips/lhu1.ll19
-rw-r--r--test/CodeGen/Mips/load-store-left-right.ll29
-rw-r--r--test/CodeGen/Mips/longbranch.ll25
-rw-r--r--test/CodeGen/Mips/machineverifier.ll21
-rw-r--r--test/CodeGen/Mips/memcpy.ll19
-rw-r--r--test/CodeGen/Mips/mips64-fp-indexed-ls.ll12
-rw-r--r--test/CodeGen/Mips/mips64load-store-left-right.ll73
-rw-r--r--test/CodeGen/Mips/neg1.ll15
-rw-r--r--test/CodeGen/Mips/not1.ll16
-rw-r--r--test/CodeGen/Mips/null.ll13
-rw-r--r--test/CodeGen/Mips/o32_cc_byval.ll60
-rw-r--r--test/CodeGen/Mips/o32_cc_vararg.ll1
-rw-r--r--test/CodeGen/Mips/or1.ll17
-rw-r--r--test/CodeGen/Mips/ra-allocatable.ll288
-rw-r--r--test/CodeGen/Mips/rdhwr-directives.ll15
-rw-r--r--test/CodeGen/Mips/return_address.ll23
-rw-r--r--test/CodeGen/Mips/sb1.ll20
-rw-r--r--test/CodeGen/Mips/selectcc.ll27
-rw-r--r--test/CodeGen/Mips/sh1.ll20
-rw-r--r--test/CodeGen/Mips/shift-parts.ll29
-rw-r--r--test/CodeGen/Mips/sitofp-selectcc-opt.ll22
-rw-r--r--test/CodeGen/Mips/sll1.ll19
-rw-r--r--test/CodeGen/Mips/sll2.ll19
-rw-r--r--test/CodeGen/Mips/sra1.ll15
-rw-r--r--test/CodeGen/Mips/sra2.ll17
-rw-r--r--test/CodeGen/Mips/srl1.ll18
-rw-r--r--test/CodeGen/Mips/srl2.ll20
-rw-r--r--test/CodeGen/Mips/stacksize.ll9
-rw-r--r--test/CodeGen/Mips/sub1.ll15
-rw-r--r--test/CodeGen/Mips/sub2.ll17
-rw-r--r--test/CodeGen/Mips/swzero.ll3
-rw-r--r--test/CodeGen/Mips/tls-alias.ll10
-rw-r--r--test/CodeGen/Mips/tls-models.ll113
-rw-r--r--test/CodeGen/Mips/tls.ll18
-rw-r--r--test/CodeGen/Mips/unalignedload.ll30
-rw-r--r--test/CodeGen/Mips/xor1.ll17
-rw-r--r--test/CodeGen/Mips/zeroreg.ll6
-rw-r--r--test/CodeGen/NVPTX/annotations.ll55
-rw-r--r--test/CodeGen/NVPTX/arithmetic-fp-sm10.ll72
-rw-r--r--test/CodeGen/NVPTX/arithmetic-fp-sm20.ll72
-rw-r--r--test/CodeGen/NVPTX/arithmetic-int.ll295
-rw-r--r--test/CodeGen/NVPTX/calling-conv.ll32
-rw-r--r--test/CodeGen/NVPTX/compare-int.ll389
-rw-r--r--test/CodeGen/NVPTX/convert-fp.ll146
-rw-r--r--test/CodeGen/NVPTX/convert-int-sm10.ll55
-rw-r--r--test/CodeGen/NVPTX/convert-int-sm20.ll64
-rw-r--r--test/CodeGen/NVPTX/fma-disable.ll24
-rw-r--r--test/CodeGen/NVPTX/fma.ll17
-rw-r--r--test/CodeGen/NVPTX/intrinsic-old.ll (renamed from test/CodeGen/PTX/intrinsic.ll)71
-rw-r--r--test/CodeGen/NVPTX/intrinsics.ll21
-rw-r--r--test/CodeGen/NVPTX/ld-addrspace.ll173
-rw-r--r--test/CodeGen/NVPTX/ld-generic.ll63
-rw-r--r--test/CodeGen/NVPTX/lit.local.cfg (renamed from test/CodeGen/PTX/lit.local.cfg)3
-rw-r--r--test/CodeGen/NVPTX/simple-call.ll26
-rw-r--r--test/CodeGen/NVPTX/st-addrspace.ll179
-rw-r--r--test/CodeGen/NVPTX/st-generic.ll69
-rw-r--r--test/CodeGen/PTX/20110926-sitofp.ll24
-rw-r--r--test/CodeGen/PTX/add.ll71
-rw-r--r--test/CodeGen/PTX/aggregates.ll24
-rw-r--r--test/CodeGen/PTX/bitwise.ll24
-rw-r--r--test/CodeGen/PTX/bra.ll24
-rw-r--r--test/CodeGen/PTX/cvt.ll290
-rw-r--r--test/CodeGen/PTX/exit.ll14
-rw-r--r--test/CodeGen/PTX/fdiv-sm10.ll15
-rw-r--r--test/CodeGen/PTX/fdiv-sm13.ll15
-rw-r--r--test/CodeGen/PTX/fneg.ll15
-rw-r--r--test/CodeGen/PTX/ld.ll382
-rw-r--r--test/CodeGen/PTX/llvm-intrinsic.ll56
-rw-r--r--test/CodeGen/PTX/mad-disabling.ll24
-rw-r--r--test/CodeGen/PTX/mad.ll17
-rw-r--r--test/CodeGen/PTX/mov.ll62
-rw-r--r--test/CodeGen/PTX/mul.ll39
-rw-r--r--test/CodeGen/PTX/options.ll13
-rw-r--r--test/CodeGen/PTX/parameter-order.ll8
-rw-r--r--test/CodeGen/PTX/printf.ll25
-rw-r--r--test/CodeGen/PTX/ret.ll7
-rw-r--r--test/CodeGen/PTX/selp.ll25
-rw-r--r--test/CodeGen/PTX/setp.ll206
-rw-r--r--test/CodeGen/PTX/shl.ll22
-rw-r--r--test/CodeGen/PTX/shr.ll43
-rw-r--r--test/CodeGen/PTX/simple-call.ll27
-rw-r--r--test/CodeGen/PTX/st.ll337
-rw-r--r--test/CodeGen/PTX/stack-object.ll19
-rw-r--r--test/CodeGen/PTX/sub.ll71
-rw-r--r--test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll2
-rw-r--r--test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll2
-rw-r--r--test/CodeGen/PowerPC/2006-04-05-splat-ish.ll2
-rw-r--r--test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll4
-rw-r--r--test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll2
-rw-r--r--test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll10
-rw-r--r--test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll2
-rw-r--r--test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll2
-rw-r--r--test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll2
-rw-r--r--test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll2
-rw-r--r--test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll2
-rw-r--r--test/CodeGen/PowerPC/2010-03-09-indirect-call.ll6
-rw-r--r--test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll4
-rw-r--r--test/CodeGen/PowerPC/Frames-leaf.ll32
-rw-r--r--test/CodeGen/PowerPC/Frames-small.ll32
-rw-r--r--test/CodeGen/PowerPC/LargeAbsoluteAddr.ll6
-rw-r--r--test/CodeGen/PowerPC/a2-fp-basic.ll2
-rw-r--r--test/CodeGen/PowerPC/and-imm.ll2
-rw-r--r--test/CodeGen/PowerPC/big-endian-actual-args.ll4
-rw-r--r--test/CodeGen/PowerPC/big-endian-call-result.ll4
-rw-r--r--test/CodeGen/PowerPC/branch-opt.ll2
-rw-r--r--test/CodeGen/PowerPC/calls.ll6
-rw-r--r--test/CodeGen/PowerPC/coalesce-ext.ll17
-rw-r--r--test/CodeGen/PowerPC/compare-simm.ll2
-rw-r--r--test/CodeGen/PowerPC/constants.ll2
-rw-r--r--test/CodeGen/PowerPC/ctrloop-reg.ll87
-rw-r--r--test/CodeGen/PowerPC/ctrloop-s000.ll156
-rw-r--r--test/CodeGen/PowerPC/ctrloop-sums.ll134
-rw-r--r--test/CodeGen/PowerPC/ctrloops.ll79
-rw-r--r--test/CodeGen/PowerPC/darwin-labels.ll2
-rw-r--r--test/CodeGen/PowerPC/fabs.ll4
-rw-r--r--test/CodeGen/PowerPC/fma.ll4
-rw-r--r--test/CodeGen/PowerPC/fnabs.ll2
-rw-r--r--test/CodeGen/PowerPC/fsqrt.ll8
-rw-r--r--test/CodeGen/PowerPC/iabs.ll4
-rw-r--r--test/CodeGen/PowerPC/isel.ll23
-rw-r--r--test/CodeGen/PowerPC/ispositive.ll2
-rw-r--r--test/CodeGen/PowerPC/lbzux.ll49
-rw-r--r--test/CodeGen/PowerPC/long-compare.ll4
-rw-r--r--test/CodeGen/PowerPC/lsr-postinc-pos.ll2
-rw-r--r--test/CodeGen/PowerPC/mem_update.ll4
-rw-r--r--test/CodeGen/PowerPC/no-dead-strip.ll2
-rw-r--r--test/CodeGen/PowerPC/ppc440-fp-basic.ll2
-rw-r--r--test/CodeGen/PowerPC/ppc64-cyclecounter.ll15
-rw-r--r--test/CodeGen/PowerPC/retaddr.ll2
-rw-r--r--test/CodeGen/PowerPC/rlwimi-commute.ll2
-rw-r--r--test/CodeGen/PowerPC/rlwimi3.ll4
-rw-r--r--test/CodeGen/PowerPC/seteq-0.ll2
-rw-r--r--test/CodeGen/PowerPC/small-arguments.ll2
-rw-r--r--test/CodeGen/PowerPC/stack-protector.ll4
-rw-r--r--test/CodeGen/PowerPC/stwu-gta.ll22
-rw-r--r--test/CodeGen/PowerPC/stwu8.ll28
-rw-r--r--test/CodeGen/PowerPC/stwux.ll47
-rw-r--r--test/CodeGen/PowerPC/tls.ll16
-rw-r--r--test/CodeGen/PowerPC/trampoline.ll2
-rw-r--r--test/CodeGen/PowerPC/vec_buildvector_loadstore.ll2
-rw-r--r--test/CodeGen/SPARC/2012-05-01-LowerArguments.ll13
-rw-r--r--test/CodeGen/SPARC/private.ll12
-rw-r--r--test/CodeGen/Thumb/2012-04-26-M0ISelBug.ll12
-rw-r--r--test/CodeGen/Thumb/asmprinter-bug.ll2
-rw-r--r--test/CodeGen/Thumb/frame_thumb.ll4
-rw-r--r--test/CodeGen/Thumb/iabs.ll4
-rw-r--r--test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll2
-rw-r--r--test/CodeGen/Thumb2/constant-islands.ll4
-rw-r--r--test/CodeGen/Thumb2/inflate-regs.ll49
-rw-r--r--test/CodeGen/Thumb2/inlineasm.ll9
-rw-r--r--test/CodeGen/Thumb2/large-call.ll9
-rw-r--r--test/CodeGen/Thumb2/thumb2-cmn.ll32
-rw-r--r--test/CodeGen/Thumb2/thumb2-cmp.ll18
-rw-r--r--test/CodeGen/Thumb2/thumb2-cmp2.ll18
-rw-r--r--test/CodeGen/Thumb2/thumb2-jtb.ll8
-rw-r--r--test/CodeGen/Thumb2/thumb2-ldr_post.ll2
-rw-r--r--test/CodeGen/Thumb2/thumb2-ldr_pre.ll4
-rw-r--r--test/CodeGen/Thumb2/thumb2-rev16.ll2
-rw-r--r--test/CodeGen/Thumb2/thumb2-ror.ll6
-rw-r--r--test/CodeGen/Thumb2/thumb2-tbb.ll4
-rw-r--r--test/CodeGen/Thumb2/thumb2-teq.ll16
-rw-r--r--test/CodeGen/Thumb2/thumb2-teq2.ll18
-rw-r--r--test/CodeGen/Thumb2/thumb2-tst.ll16
-rw-r--r--test/CodeGen/Thumb2/thumb2-tst2.ll18
-rw-r--r--test/CodeGen/Thumb2/thumb2-uxt_rot.ll21
-rw-r--r--test/CodeGen/Thumb2/tls1.ll6
-rw-r--r--test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll2
-rw-r--r--test/CodeGen/X86/2003-11-03-GlobalBool.ll2
-rw-r--r--test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll6
-rw-r--r--test/CodeGen/X86/2004-03-30-Select-Max.ll3
-rw-r--r--test/CodeGen/X86/2006-03-01-InstrSchedBug.ll2
-rw-r--r--test/CodeGen/X86/2006-03-02-InstrSchedBug.ll2
-rw-r--r--test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll4
-rw-r--r--test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll4
-rw-r--r--test/CodeGen/X86/2006-05-02-InstrSched1.ll2
-rw-r--r--test/CodeGen/X86/2006-05-02-InstrSched2.ll2
-rw-r--r--test/CodeGen/X86/2006-05-08-InstrSched.ll2
-rw-r--r--test/CodeGen/X86/2006-05-11-InstrSched.ll4
-rw-r--r--test/CodeGen/X86/2006-07-31-SingleRegClass.ll4
-rw-r--r--test/CodeGen/X86/2006-08-21-ExtraMovInst.ll2
-rw-r--r--test/CodeGen/X86/2006-11-12-CSRetCC.ll4
-rw-r--r--test/CodeGen/X86/2006-11-17-IllegalMove.ll4
-rw-r--r--test/CodeGen/X86/2007-01-13-StackPtrIndex.ll2
-rw-r--r--test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll2
-rw-r--r--test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll2
-rw-r--r--test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll2
-rw-r--r--test/CodeGen/X86/2007-05-07-InvokeSRet.ll2
-rw-r--r--test/CodeGen/X86/2007-08-10-SignExtSubreg.ll2
-rw-r--r--test/CodeGen/X86/2007-09-05-InvalidAsm.ll3
-rw-r--r--test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll2
-rw-r--r--test/CodeGen/X86/2007-12-18-LoadCSEBug.ll2
-rw-r--r--test/CodeGen/X86/2008-01-08-SchedulerCrash.ll6
-rw-r--r--test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll2
-rw-r--r--test/CodeGen/X86/2008-02-18-TailMergingBug.ll2
-rw-r--r--test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll4
-rw-r--r--test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll2
-rw-r--r--test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll2
-rw-r--r--test/CodeGen/X86/2008-04-16-ReMatBug.ll2
-rw-r--r--test/CodeGen/X86/2008-04-17-CoalescerBug.ll2
-rw-r--r--test/CodeGen/X86/2008-04-28-CoalescerBug.ll6
-rw-r--r--test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll2
-rw-r--r--test/CodeGen/X86/2008-08-06-CmpStride.ll2
-rw-r--r--test/CodeGen/X86/2008-08-31-EH_RETURN32.ll27
-rw-r--r--test/CodeGen/X86/2008-09-17-inline-asm-1.ll2
-rw-r--r--test/CodeGen/X86/2008-09-18-inline-asm-2.ll2
-rw-r--r--test/CodeGen/X86/2008-10-24-FlippedCompare.ll2
-rw-r--r--test/CodeGen/X86/2008-10-27-CoalescerBug.ll5
-rw-r--r--test/CodeGen/X86/2008-12-23-crazy-address.ll2
-rw-r--r--test/CodeGen/X86/2009-01-31-BigShift2.ll2
-rw-r--r--test/CodeGen/X86/2009-02-25-CommuteBug.ll2
-rw-r--r--test/CodeGen/X86/2009-02-26-MachineLICMBug.ll4
-rw-r--r--test/CodeGen/X86/2009-03-12-CPAlignBug.ll2
-rw-r--r--test/CodeGen/X86/2009-03-23-MultiUseSched.ll4
-rw-r--r--test/CodeGen/X86/2009-04-16-SpillerUnfold.ll2
-rw-r--r--test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll2
-rw-r--r--test/CodeGen/X86/2009-04-24.ll6
-rw-r--r--test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll2
-rw-r--r--test/CodeGen/X86/2009-05-30-ISelBug.ll2
-rw-r--r--test/CodeGen/X86/20090313-signext.ll4
-rw-r--r--test/CodeGen/X86/2010-01-19-OptExtBug.ll2
-rw-r--r--test/CodeGen/X86/2010-05-06-LocalInlineAsmClobber.ll2
-rw-r--r--test/CodeGen/X86/2010-05-12-FastAllocKills.ll2
-rw-r--r--test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll2
-rw-r--r--test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll4
-rw-r--r--test/CodeGen/X86/2011-04-19-sclr-bb.ll21
-rw-r--r--test/CodeGen/X86/2011-06-03-x87chain.ll2
-rw-r--r--test/CodeGen/X86/2011-06-12-FastAllocSpill.ll2
-rw-r--r--test/CodeGen/X86/2011-09-18-sse2cmp.ll2
-rw-r--r--test/CodeGen/X86/2011-09-21-setcc-bug.ll2
-rw-r--r--test/CodeGen/X86/2011-10-11-srl.ll2
-rw-r--r--test/CodeGen/X86/2011-12-15-vec_shift.ll4
-rw-r--r--test/CodeGen/X86/2012-02-20-MachineCPBug.ll2
-rw-r--r--test/CodeGen/X86/2012-03-26-PostRALICMBug.ll4
-rw-r--r--test/CodeGen/X86/2012-04-26-sdglue.ll3
-rw-r--r--test/CodeGen/X86/2012-05-17-TwoAddressBug.ll16
-rw-r--r--test/CodeGen/X86/2012-05-19-CoalescerCrash.ll122
-rw-r--r--test/CodeGen/X86/2012-05-19-avx2-store.ll13
-rw-r--r--test/CodeGen/X86/2012-07-10-extload64.ll32
-rw-r--r--test/CodeGen/X86/2012-07-10-shufnorm.ll17
-rw-r--r--test/CodeGen/X86/2012-07-15-broadcastfold.ll23
-rw-r--r--test/CodeGen/X86/2012-07-15-tconst_shl.ll9
-rw-r--r--test/CodeGen/X86/2012-07-15-vshl.ll31
-rw-r--r--test/CodeGen/X86/2012-07-16-LeaUndef.ll16
-rw-r--r--test/CodeGen/X86/2012-07-16-fp2ui-i1.ll12
-rw-r--r--test/CodeGen/X86/2012-07-17-vtrunc.ll16
-rw-r--r--test/CodeGen/X86/2012-07-23-select_cc.ll19
-rw-r--r--test/CodeGen/X86/2012-08-07-CmpISelBug.ll36
-rw-r--r--test/CodeGen/X86/4char-promote.ll9
-rw-r--r--test/CodeGen/X86/MachineSink-PHIUse.ll2
-rw-r--r--test/CodeGen/X86/add.ll10
-rw-r--r--test/CodeGen/X86/addr-label-difference.ll2
-rw-r--r--test/CodeGen/X86/aligned-comm.ll4
-rw-r--r--test/CodeGen/X86/alignment-2.ll4
-rw-r--r--test/CodeGen/X86/alloca-align-rounding-32.ll7
-rw-r--r--test/CodeGen/X86/alloca-align-rounding.ll7
-rw-r--r--test/CodeGen/X86/andimm8.ll2
-rw-r--r--test/CodeGen/X86/asm-reg-type-mismatch.ll (renamed from test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll)19
-rw-r--r--test/CodeGen/X86/atom-lea-sp.ll26
-rw-r--r--test/CodeGen/X86/atom-sched.ll3
-rw-r--r--test/CodeGen/X86/atomic_op.ll2
-rw-r--r--test/CodeGen/X86/avx-blend.ll2
-rw-r--r--test/CodeGen/X86/avx-intrinsics-x86.ll57
-rw-r--r--test/CodeGen/X86/avx-minmax.ll2
-rwxr-xr-xtest/CodeGen/X86/avx-shuffle-x86_32.ll2
-rw-r--r--test/CodeGen/X86/avx-shuffle.ll62
-rw-r--r--test/CodeGen/X86/avx-vbroadcast.ll29
-rwxr-xr-xtest/CodeGen/X86/avx2-conversions.ll68
-rw-r--r--test/CodeGen/X86/avx2-intrinsics-x86.ll179
-rw-r--r--test/CodeGen/X86/avx2-shuffle.ll28
-rw-r--r--test/CodeGen/X86/avx2-vbroadcast.ll178
-rw-r--r--test/CodeGen/X86/basic-promote-integers.ll4
-rw-r--r--test/CodeGen/X86/bigstructret.ll29
-rw-r--r--test/CodeGen/X86/blend-msb.ll2
-rw-r--r--test/CodeGen/X86/block-placement.ll10
-rw-r--r--test/CodeGen/X86/bool-simplify.ll42
-rw-r--r--test/CodeGen/X86/br-fold.ll2
-rw-r--r--test/CodeGen/X86/break-anti-dependencies.ll14
-rw-r--r--test/CodeGen/X86/break-sse-dep.ll7
-rw-r--r--test/CodeGen/X86/call-imm.ll8
-rw-r--r--test/CodeGen/X86/cfstring.ll2
-rw-r--r--test/CodeGen/X86/cmov-into-branch.ll63
-rw-r--r--test/CodeGen/X86/cmov.ll10
-rw-r--r--test/CodeGen/X86/cmp.ll61
-rw-r--r--test/CodeGen/X86/coalesce-esp.ll2
-rw-r--r--test/CodeGen/X86/coalescer-commute2.ll13
-rw-r--r--test/CodeGen/X86/coalescer-dce2.ll118
-rw-r--r--test/CodeGen/X86/coalescer-identity.ll82
-rw-r--r--test/CodeGen/X86/constant-pool-sharing.ll4
-rw-r--r--test/CodeGen/X86/constructor.ll27
-rw-r--r--test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll4
-rw-r--r--test/CodeGen/X86/crash.ll55
-rw-r--r--test/CodeGen/X86/ctpop-combine.ll2
-rw-r--r--test/CodeGen/X86/dagcombine-cse.ll2
-rw-r--r--test/CodeGen/X86/dbg-merge-loc-entry.ll2
-rw-r--r--test/CodeGen/X86/dbg-value-range.ll1
-rw-r--r--test/CodeGen/X86/divide-by-constant.ll21
-rw-r--r--test/CodeGen/X86/dynamic-allocas-VLAs.ll237
-rw-r--r--test/CodeGen/X86/early-ifcvt.ll69
-rw-r--r--test/CodeGen/X86/epilogue.ll6
-rw-r--r--test/CodeGen/X86/extractps.ll4
-rw-r--r--test/CodeGen/X86/fabs.ll40
-rw-r--r--test/CodeGen/X86/fast-cc-merge-stack-adj.ll2
-rw-r--r--test/CodeGen/X86/fast-isel-constpool.ll2
-rw-r--r--test/CodeGen/X86/fast-isel-gv.ll2
-rw-r--r--test/CodeGen/X86/fast-isel-mem.ll14
-rw-r--r--test/CodeGen/X86/fast-isel-x86.ll14
-rw-r--r--test/CodeGen/X86/fast-isel.ll12
-rw-r--r--test/CodeGen/X86/fastcc-byval.ll2
-rw-r--r--test/CodeGen/X86/fma.ll12
-rwxr-xr-xtest/CodeGen/X86/fma3-intrinsics.ll132
-rw-r--r--test/CodeGen/X86/fma4-intrinsics-x86_64.ll224
-rw-r--r--test/CodeGen/X86/fma_patterns.ll139
-rw-r--r--test/CodeGen/X86/fold-load.ll26
-rw-r--r--test/CodeGen/X86/fold-pcmpeqd-1.ll13
-rw-r--r--test/CodeGen/X86/force-align-stack-alloca.ll70
-rw-r--r--test/CodeGen/X86/fp-immediate-shorten.ll2
-rw-r--r--test/CodeGen/X86/fp-in-intregs.ll2
-rw-r--r--test/CodeGen/X86/fp-stack-compare-cmov.ll12
-rw-r--r--test/CodeGen/X86/fp-stack-compare.ll7
-rw-r--r--test/CodeGen/X86/fp-stack-ret.ll2
-rw-r--r--test/CodeGen/X86/fp_load_fold.ll2
-rw-r--r--test/CodeGen/X86/full-lsr.ll16
-rw-r--r--test/CodeGen/X86/gather-addresses.ll4
-rw-r--r--test/CodeGen/X86/gs-fold.ll20
-rw-r--r--test/CodeGen/X86/h-register-addressing-32.ll2
-rw-r--r--test/CodeGen/X86/h-register-addressing-64.ll2
-rw-r--r--test/CodeGen/X86/h-registers-1.ll4
-rw-r--r--test/CodeGen/X86/hoist-invariant-load.ll2
-rw-r--r--test/CodeGen/X86/iabs.ll16
-rw-r--r--test/CodeGen/X86/illegal-vector-args-return.ll8
-rw-r--r--test/CodeGen/X86/inline-asm-error.ll2
-rw-r--r--test/CodeGen/X86/inline-asm-modifier-n.ll2
-rw-r--r--test/CodeGen/X86/inline-asm.ll9
-rw-r--r--test/CodeGen/X86/inreg.ll46
-rw-r--r--test/CodeGen/X86/isel-sink2.ll2
-rw-r--r--test/CodeGen/X86/ispositive.ll2
-rw-r--r--test/CodeGen/X86/jump_sign.ll221
-rw-r--r--test/CodeGen/X86/label-redefinition.ll2
-rw-r--r--test/CodeGen/X86/large-global.ll11
-rw-r--r--test/CodeGen/X86/lea-2.ll2
-rw-r--r--test/CodeGen/X86/liveness-local-regalloc.ll34
-rw-r--r--test/CodeGen/X86/loop-blocks.ll4
-rw-r--r--test/CodeGen/X86/lsr-loop-exit-cond.ll17
-rw-r--r--test/CodeGen/X86/lsr-reuse-trunc.ll4
-rw-r--r--test/CodeGen/X86/lsr-static-addr.ll12
-rw-r--r--test/CodeGen/X86/machine-cse.ll57
-rw-r--r--test/CodeGen/X86/mem-promote-integers.ll4
-rw-r--r--test/CodeGen/X86/memcmp.ll3
-rw-r--r--test/CodeGen/X86/mmx-punpckhdq.ll2
-rw-r--r--test/CodeGen/X86/movgs.ll16
-rw-r--r--test/CodeGen/X86/multiple-loop-post-inc.ll10
-rw-r--r--test/CodeGen/X86/neg_cmp.ll22
-rw-r--r--test/CodeGen/X86/opt-shuff-tstore.ll2
-rw-r--r--test/CodeGen/X86/overlap-shift.ll2
-rw-r--r--test/CodeGen/X86/pass-three.ll16
-rw-r--r--test/CodeGen/X86/peep-vector-extract-insert.ll2
-rw-r--r--test/CodeGen/X86/phi-immediate-factoring.ll2
-rw-r--r--test/CodeGen/X86/phielim-split.ll30
-rw-r--r--test/CodeGen/X86/phys-reg-local-regalloc.ll20
-rw-r--r--test/CodeGen/X86/phys_subreg_coalesce-3.ll6
-rw-r--r--test/CodeGen/X86/pmul.ll4
-rw-r--r--test/CodeGen/X86/pointer-vector.ll3
-rw-r--r--test/CodeGen/X86/pr11415.ll2
-rw-r--r--test/CodeGen/X86/pr11468.ll33
-rw-r--r--test/CodeGen/X86/pr12889.ll18
-rw-r--r--test/CodeGen/X86/pr13209.ll74
-rw-r--r--test/CodeGen/X86/pr13220.ll20
-rw-r--r--test/CodeGen/X86/pr13577.ll8
-rw-r--r--test/CodeGen/X86/pr2656.ll2
-rw-r--r--test/CodeGen/X86/pr3522.ll2
-rw-r--r--test/CodeGen/X86/promote-trunc.ll2
-rw-r--r--test/CodeGen/X86/rd-mod-wr-eflags.ll46
-rw-r--r--test/CodeGen/X86/rdrand.ll85
-rw-r--r--test/CodeGen/X86/regpressure.ll4
-rw-r--r--test/CodeGen/X86/remat-fold-load.ll143
-rw-r--r--test/CodeGen/X86/remat-scalar-zero.ll2
-rw-r--r--test/CodeGen/X86/reverse_branches.ll104
-rw-r--r--test/CodeGen/X86/rotate.ll2
-rw-r--r--test/CodeGen/X86/rounding-ops.ll4
-rw-r--r--test/CodeGen/X86/segmented-stacks-dynamic.ll12
-rw-r--r--test/CodeGen/X86/select.ll132
-rw-r--r--test/CodeGen/X86/selectiondag-cse.ll69
-rw-r--r--test/CodeGen/X86/sext-setcc-self.ll55
-rw-r--r--test/CodeGen/X86/shift-and.ll46
-rw-r--r--test/CodeGen/X86/shift-coalesce.ll4
-rw-r--r--test/CodeGen/X86/shift-double.ll2
-rw-r--r--test/CodeGen/X86/shift-folding.ll2
-rw-r--r--test/CodeGen/X86/shl_elim.ll6
-rw-r--r--test/CodeGen/X86/sincos.ll26
-rw-r--r--test/CodeGen/X86/sink-hoist.ll2
-rw-r--r--test/CodeGen/X86/sink-out-of-loop.ll54
-rw-r--r--test/CodeGen/X86/splat-scalar-load.ll2
-rw-r--r--test/CodeGen/X86/sse-align-12.ll2
-rw-r--r--test/CodeGen/X86/sse-domains.ll2
-rw-r--r--test/CodeGen/X86/sse-minmax.ll285
-rw-r--r--test/CodeGen/X86/sse3.ll7
-rw-r--r--test/CodeGen/X86/sse41-blend.ll2
-rw-r--r--test/CodeGen/X86/sse41.ll4
-rw-r--r--test/CodeGen/X86/sse4a.ll56
-rw-r--r--test/CodeGen/X86/sse_reload_fold.ll2
-rw-r--r--test/CodeGen/X86/stack-align.ll4
-rw-r--r--test/CodeGen/X86/stack-protector.ll (renamed from test/CodeGen/X86/stack-protector-linux.ll)4
-rw-r--r--test/CodeGen/X86/store_op_load_fold2.ll4
-rw-r--r--test/CodeGen/X86/subreg-to-reg-1.ll2
-rw-r--r--test/CodeGen/X86/subreg-to-reg-4.ll2
-rw-r--r--test/CodeGen/X86/switch-order-weight.ll37
-rw-r--r--test/CodeGen/X86/tailcall-64.ll96
-rw-r--r--test/CodeGen/X86/tailcall-cgp-dup.ll87
-rw-r--r--test/CodeGen/X86/tailcall-i1.ll6
-rw-r--r--test/CodeGen/X86/tailcall-largecode.ll10
-rw-r--r--test/CodeGen/X86/tailcall-void.ll6
-rw-r--r--test/CodeGen/X86/tailcall.ll (renamed from test/CodeGen/X86/tailcall1.ll)14
-rw-r--r--test/CodeGen/X86/tailcallbyval.ll2
-rw-r--r--test/CodeGen/X86/targetLoweringGeneric.ll38
-rw-r--r--test/CodeGen/X86/thiscall-struct-return.ll4
-rw-r--r--test/CodeGen/X86/tls-local-dynamic.ll59
-rw-r--r--test/CodeGen/X86/tls-models.ll166
-rw-r--r--test/CodeGen/X86/tls-pic.ll20
-rw-r--r--test/CodeGen/X86/tls-pie.ll20
-rw-r--r--test/CodeGen/X86/trap.ll16
-rw-r--r--test/CodeGen/X86/trunc-ext-ld-st.ll2
-rw-r--r--test/CodeGen/X86/twoaddr-coalesce-2.ll4
-rw-r--r--test/CodeGen/X86/twoaddr-pass-sink.ll2
-rw-r--r--test/CodeGen/X86/uint_to_fp.ll2
-rw-r--r--test/CodeGen/X86/umul-with-carry.ll2
-rw-r--r--test/CodeGen/X86/unwindraise.ll252
-rw-r--r--test/CodeGen/X86/v-binop-widen2.ll9
-rw-r--r--test/CodeGen/X86/vec_call.ll4
-rw-r--r--test/CodeGen/X86/vec_cast2.ll49
-rw-r--r--test/CodeGen/X86/vec_compare-2.ll3
-rw-r--r--test/CodeGen/X86/vec_compare.ll2
-rw-r--r--test/CodeGen/X86/vec_ins_extract-1.ll2
-rw-r--r--test/CodeGen/X86/vec_insert-6.ll4
-rw-r--r--test/CodeGen/X86/vec_set-3.ll2
-rw-r--r--test/CodeGen/X86/vec_set-9.ll2
-rw-r--r--test/CodeGen/X86/vec_shuffle-16.ll4
-rw-r--r--test/CodeGen/X86/vec_shuffle-19.ll2
-rw-r--r--test/CodeGen/X86/vec_shuffle-27.ll4
-rw-r--r--test/CodeGen/X86/vec_shuffle-35.ll4
-rw-r--r--test/CodeGen/X86/vec_shuffle-36.ll2
-rw-r--r--test/CodeGen/X86/vec_shuffle-37.ll4
-rw-r--r--test/CodeGen/X86/vec_shuffle-38.ll2
-rw-r--r--test/CodeGen/X86/vec_shuffle-39.ll2
-rw-r--r--test/CodeGen/X86/vec_splat-2.ll2
-rw-r--r--test/CodeGen/X86/vec_splat-3.ll2
-rw-r--r--test/CodeGen/X86/vec_splat-4.ll2
-rw-r--r--test/CodeGen/X86/vec_splat.ll4
-rw-r--r--test/CodeGen/X86/vec_ss_load_fold.ll14
-rw-r--r--test/CodeGen/X86/vshift-1.ll4
-rw-r--r--test/CodeGen/X86/vshift-2.ll6
-rw-r--r--test/CodeGen/X86/vshift-3.ll4
-rw-r--r--test/CodeGen/X86/vshift-5.ll8
-rw-r--r--test/CodeGen/X86/widen_arith-3.ll1
-rw-r--r--test/CodeGen/X86/widen_cast-1.ll13
-rw-r--r--test/CodeGen/X86/widen_cast-2.ll2
-rw-r--r--test/CodeGen/X86/widen_cast-5.ll3
-rw-r--r--test/CodeGen/X86/widen_conv-4.ll2
-rw-r--r--test/CodeGen/X86/widen_extract-1.ll2
-rw-r--r--test/CodeGen/X86/widen_load-0.ll14
-rw-r--r--test/CodeGen/X86/win64_alloca_dynalloca.ll15
-rw-r--r--test/CodeGen/X86/x86-64-arg.ll2
-rw-r--r--test/CodeGen/X86/x86-64-dead-stack-adjust.ll4
-rw-r--r--test/CodeGen/X86/x86-64-pic-1.ll2
-rw-r--r--test/CodeGen/X86/x86-64-pic-10.ll2
-rw-r--r--test/CodeGen/X86/x86-64-pic-11.ll2
-rw-r--r--test/CodeGen/X86/x86-64-pic-2.ll4
-rw-r--r--test/CodeGen/X86/x86-64-pic-3.ll4
-rw-r--r--test/CodeGen/X86/x86-64-pic-4.ll2
-rw-r--r--test/CodeGen/X86/x86-64-pic-5.ll2
-rw-r--r--test/CodeGen/X86/x86-64-pic-6.ll2
-rw-r--r--test/CodeGen/X86/x86-64-pic-7.ll2
-rw-r--r--test/CodeGen/X86/x86-64-pic-8.ll2
-rw-r--r--test/CodeGen/X86/x86-64-pic-9.ll2
-rw-r--r--test/CodeGen/X86/xop-intrinsics-x86_64.ll80
-rw-r--r--test/CodeGen/X86/xor.ll8
-rw-r--r--test/CodeGen/XCore/mkmsk.ll11
632 files changed, 13078 insertions, 3245 deletions
diff --git a/test/CodeGen/ARM/2007-03-13-InstrSched.ll b/test/CodeGen/ARM/2007-03-13-InstrSched.ll
index 33f935e960b1..a63cdd46e2d8 100644
--- a/test/CodeGen/ARM/2007-03-13-InstrSched.ll
+++ b/test/CodeGen/ARM/2007-03-13-InstrSched.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \
; RUN: -mattr=+v6 | grep r9
; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \
-; RUN: -mattr=+v6 -arm-reserve-r9 -ifcvt-limit=0 -stats |& grep asm-printer
+; RUN: -mattr=+v6 -arm-reserve-r9 -ifcvt-limit=0 -stats 2>&1 | grep asm-printer
; | grep 35
define void @test(i32 %tmp56222, i32 %tmp36224, i32 %tmp46223, i32 %i.0196.0.ph, i32 %tmp8, i32* %tmp1011, i32** %tmp1, i32* %d2.1.out, i32* %d3.1.out, i32* %d0.1.out, i32* %d1.1.out) {
diff --git a/test/CodeGen/ARM/2007-04-03-PEIBug.ll b/test/CodeGen/ARM/2007-04-03-PEIBug.ll
index b543c57e1a85..8d3337c29fcf 100644
--- a/test/CodeGen/ARM/2007-04-03-PEIBug.ll
+++ b/test/CodeGen/ARM/2007-04-03-PEIBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | not grep {add.*#0}
+; RUN: llc < %s -march=arm | not grep "add.*#0"
define i32 @foo() {
entry:
diff --git a/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll b/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll
index d2eb85d356c5..670048bf25c4 100644
--- a/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll
+++ b/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | not grep {str.*\\!}
+; RUN: llc < %s -march=arm | not grep "str.*\!"
%struct.shape_edge_t = type { %struct.shape_edge_t*, %struct.shape_edge_t*, i32, i32, i32, i32 }
%struct.shape_path_t = type { %struct.shape_edge_t*, %struct.shape_edge_t*, i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll b/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
index fd2f4620bceb..3754db01fdd1 100644
--- a/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
+++ b/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=fast
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=fast -optimize-regalloc=0
; PR1925
%struct.encode_aux_nearestmatch = type { i32*, i32*, i32*, i32*, i32, i32 }
diff --git a/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll b/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll
index 44da8e7905f5..5fbed0da5ce8 100644
--- a/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll
+++ b/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=fast
+; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=fast -optimize-regalloc=0
; PR1925
%"struct.kc::impl_Ccode_option" = type { %"struct.kc::impl_abstract_phylum" }
diff --git a/test/CodeGen/ARM/2009-04-06-AsmModifier.ll b/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
index 352672274d20..7342f69631e6 100644
--- a/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
+++ b/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | grep {swi 107}
+; RUN: llc < %s -march=arm | grep "swi 107"
define i32 @_swilseek(i32) nounwind {
entry:
diff --git a/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll b/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll
index 813bf3c360d3..7d4cc6e3a75a 100644
--- a/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll
+++ b/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -regalloc=fast -verify-machineinstrs
+; RUN: llc < %s -regalloc=fast -optimize-regalloc=0 -verify-machineinstrs
target triple = "arm-pc-linux-gnu"
; This test case would accidentally use the same physreg for two virtregs
diff --git a/test/CodeGen/ARM/2011-12-14-machine-sink.ll b/test/CodeGen/ARM/2011-12-14-machine-sink.ll
index 5ce600d1a939..b21bb006e327 100644
--- a/test/CodeGen/ARM/2011-12-14-machine-sink.ll
+++ b/test/CodeGen/ARM/2011-12-14-machine-sink.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -o /dev/null -stats |& FileCheck %s -check-prefix=STATS
+; RUN: llc < %s -o /dev/null -stats 2>&1 | FileCheck %s -check-prefix=STATS
; Radar 10266272
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
target triple = "thumbv7-apple-ios4.0.0"
diff --git a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
index 872eca34ad11..f1c85f1b41f5 100644
--- a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
+++ b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
@@ -60,8 +60,16 @@ for.end: ; preds = %entry
ret void
}
+; Check that pseudo-expansion preserves <undef> flags.
+define void @foo3(i8* %p) nounwind ssp {
+entry:
+ tail call void @llvm.arm.neon.vst2.v4f32(i8* %p, <4 x float> undef, <4 x float> undef, i32 4)
+ ret void
+}
+
declare arm_aapcs_vfpcc void @bar(i8*, float, float, float)
declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
!0 = metadata !{metadata !"omnipotent char", metadata !1}
!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll b/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll
new file mode 100644
index 000000000000..b3a7e3444f36
--- /dev/null
+++ b/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll
@@ -0,0 +1,71 @@
+; RUN: llc -mtriple=thumbv7-apple-ios -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 < %s
+
+; CodeGen SplitCriticalEdge() shouldn't try to break edge to a landing pad.
+; rdar://11300144
+
+%0 = type opaque
+%class.FunctionInterpreter.3.15.31 = type { %class.Parser.1.13.29, %class.Parser.1.13.29*, %struct.ParserVariable.2.14.30*, i32 }
+%class.Parser.1.13.29 = type { i32 (...)**, %class.Parser.1.13.29* }
+%struct.ParserVariable.2.14.30 = type opaque
+%struct.ParseErrorMsg.0.12.28 = type { i32, i32, i32 }
+
+@_ZTI13ParseErrorMsg = external hidden unnamed_addr constant { i8*, i8* }
+@"OBJC_IVAR_$_MUMathExpressionDoubleBased.mInterpreter" = external hidden global i32, section "__DATA, __objc_ivar", align 4
+@"\01L_OBJC_SELECTOR_REFERENCES_14" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+
+declare i8* @objc_msgSend(i8*, i8*, ...)
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+declare void @__cxa_end_catch()
+
+declare void @_ZSt9terminatev()
+
+define hidden double @t(%0* %self, i8* nocapture %_cmd) optsize ssp {
+entry:
+ %call = invoke double undef(%class.FunctionInterpreter.3.15.31* undef) optsize
+ to label %try.cont unwind label %lpad
+
+lpad: ; preds = %entry
+ %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+ catch i8* bitcast ({ i8*, i8* }* @_ZTI13ParseErrorMsg to i8*)
+ br i1 undef, label %catch, label %eh.resume
+
+catch: ; preds = %lpad
+ invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, %struct.ParseErrorMsg.0.12.28*)*)(i8* undef, i8* undef, %struct.ParseErrorMsg.0.12.28* undef) optsize
+ to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2: ; preds = %catch
+ br label %try.cont
+
+try.cont: ; preds = %invoke.cont2, %entry
+ %value.0 = phi double [ 0x7FF8000000000000, %invoke.cont2 ], [ %call, %entry ]
+ ret double %value.0
+
+lpad1: ; preds = %catch
+ %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+ cleanup
+ invoke void @__cxa_end_catch()
+ to label %eh.resume unwind label %terminate.lpad
+
+eh.resume: ; preds = %lpad1, %lpad
+ resume { i8*, i32 } undef
+
+terminate.lpad: ; preds = %lpad1
+ %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+ catch i8* null
+ unreachable
+}
+
+declare i32 @__gxx_personality_sj0(...)
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+
+!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
+!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
+!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!3 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
diff --git a/test/CodeGen/ARM/2012-05-29-TailDupBug.ll b/test/CodeGen/ARM/2012-05-29-TailDupBug.ll
new file mode 100644
index 000000000000..1a57f04f6458
--- /dev/null
+++ b/test/CodeGen/ARM/2012-05-29-TailDupBug.ll
@@ -0,0 +1,140 @@
+; RUN: llc -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -verify-machineinstrs < %s
+
+; Teach taildup to update livein set to appease verifier.
+; rdar://11538365
+
+%struct.__CFString.2 = type opaque
+
+declare void @CFRelease(i8*)
+
+define hidden fastcc i32 @t() ssp {
+entry:
+ %mylocale.i.i = alloca [256 x i8], align 1
+ br i1 undef, label %return, label %CFStringIsHyphenationAvailableForLocale.exit
+
+CFStringIsHyphenationAvailableForLocale.exit: ; preds = %entry
+ br i1 undef, label %return, label %if.end
+
+if.end: ; preds = %CFStringIsHyphenationAvailableForLocale.exit
+ br i1 undef, label %if.end8.thread.i, label %if.then.i
+
+if.then.i: ; preds = %if.end
+ br i1 undef, label %if.end8.thread.i, label %if.end8.i
+
+if.end8.thread.i: ; preds = %if.then.i, %if.end
+ unreachable
+
+if.end8.i: ; preds = %if.then.i
+ br i1 undef, label %if.then11.i, label %__CFHyphenationPullTokenizer.exit
+
+if.then11.i: ; preds = %if.end8.i
+ unreachable
+
+__CFHyphenationPullTokenizer.exit: ; preds = %if.end8.i
+ br i1 undef, label %if.end68, label %if.then3
+
+if.then3: ; preds = %__CFHyphenationPullTokenizer.exit
+ br i1 undef, label %cond.end, label %cond.false
+
+cond.false: ; preds = %if.then3
+ br label %cond.end
+
+cond.end: ; preds = %cond.false, %if.then3
+ br i1 undef, label %while.end, label %while.body
+
+while.body: ; preds = %cond.end
+ unreachable
+
+while.end: ; preds = %cond.end
+ br i1 undef, label %if.end5.i, label %if.then.i16
+
+if.then.i16: ; preds = %while.end
+ br i1 undef, label %if.then4.i, label %if.end5.i
+
+if.then4.i: ; preds = %if.then.i16
+ br i1 false, label %cleanup.thread, label %if.end.i20
+
+if.end5.i: ; preds = %if.then.i16, %while.end
+ unreachable
+
+if.end.i20: ; preds = %if.then4.i
+ br label %for.body.i146.i
+
+for.body.i146.i: ; preds = %for.body.i146.i, %if.end.i20
+ br i1 undef, label %if.end20.i, label %for.body.i146.i
+
+if.end20.i: ; preds = %for.body.i146.i
+ br i1 undef, label %cleanup.thread, label %if.end23.i
+
+if.end23.i: ; preds = %if.end20.i
+ br label %for.body.i94.i
+
+for.body.i94.i: ; preds = %for.body.i94.i, %if.end23.i
+ br i1 undef, label %if.then28.i, label %for.body.i94.i
+
+if.then28.i: ; preds = %for.body.i94.i
+ br i1 undef, label %cond.true.i26, label %land.lhs.true
+
+cond.true.i26: ; preds = %if.then28.i
+ br label %land.lhs.true
+
+land.lhs.true: ; preds = %cond.true.i26, %if.then28.i
+ br i1 false, label %cleanup.thread, label %if.end35
+
+if.end35: ; preds = %land.lhs.true
+ br i1 undef, label %cleanup.thread, label %if.end45
+
+if.end45: ; preds = %if.end35
+ br i1 undef, label %if.then50, label %if.end.i37
+
+if.end.i37: ; preds = %if.end45
+ br label %if.then50
+
+if.then50: ; preds = %if.end.i37, %if.end45
+ br i1 undef, label %__CFHyphenationGetHyphensForString.exit, label %if.end.i
+
+if.end.i: ; preds = %if.then50
+ br i1 undef, label %cleanup.i, label %cond.true.i
+
+cond.true.i: ; preds = %if.end.i
+ br i1 undef, label %for.cond16.preheader.i, label %for.cond57.preheader.i
+
+for.cond16.preheader.i: ; preds = %cond.true.i
+ %cmp1791.i = icmp sgt i32 undef, 1
+ br i1 %cmp1791.i, label %for.body18.i, label %for.cond57.preheader.i
+
+for.cond57.preheader.i: ; preds = %for.cond16.preheader.i, %cond.true.i
+ %sub69.i = add i32 undef, -2
+ br label %cleanup.i
+
+for.body18.i: ; preds = %for.cond16.preheader.i
+ store i16 0, i16* undef, align 2
+ br label %while.body.i
+
+while.body.i: ; preds = %while.body.i, %for.body18.i
+ br label %while.body.i
+
+cleanup.i: ; preds = %for.cond57.preheader.i, %if.end.i
+ br label %__CFHyphenationGetHyphensForString.exit
+
+__CFHyphenationGetHyphensForString.exit: ; preds = %cleanup.i, %if.then50
+ %retval.1.i = phi i32 [ 0, %cleanup.i ], [ -1, %if.then50 ]
+ %phitmp = bitcast %struct.__CFString.2* null to i8*
+ br label %if.end68
+
+cleanup.thread: ; preds = %if.end35, %land.lhs.true, %if.end20.i, %if.then4.i
+ call void @llvm.stackrestore(i8* null)
+ br label %return
+
+if.end68: ; preds = %__CFHyphenationGetHyphensForString.exit, %__CFHyphenationPullTokenizer.exit
+ %hyphenCount.2 = phi i32 [ %retval.1.i, %__CFHyphenationGetHyphensForString.exit ], [ 0, %__CFHyphenationPullTokenizer.exit ]
+ %_token.1 = phi i8* [ %phitmp, %__CFHyphenationGetHyphensForString.exit ], [ undef, %__CFHyphenationPullTokenizer.exit ]
+ call void @CFRelease(i8* %_token.1)
+ br label %return
+
+return: ; preds = %if.end68, %cleanup.thread, %CFStringIsHyphenationAvailableForLocale.exit, %entry
+ %retval.1 = phi i32 [ %hyphenCount.2, %if.end68 ], [ -1, %CFStringIsHyphenationAvailableForLocale.exit ], [ -1, %cleanup.thread ], [ -1, %entry ]
+ ret i32 %retval.1
+}
+
+declare void @llvm.stackrestore(i8*) nounwind
diff --git a/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll b/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll
new file mode 100644
index 000000000000..b05ec6367ee4
--- /dev/null
+++ b/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -o /dev/null "-mtriple=thumbv7-apple-ios" -debug-only=post-RA-sched 2> %t
+; RUN: FileCheck %s < %t
+; REQUIRES: asserts
+; Make sure that mayalias store-load dependencies have one cycle
+; latency regardless of whether they are barriers or not.
+
+; CHECK: ** List Scheduling
+; CHECK: SU(2){{.*}}STR{{.*}}Volatile
+; CHECK-NOT: ch SU
+; CHECK: ch SU(3): Latency=1
+; CHECK-NOT: ch SU
+; CHECK: SU(3){{.*}}LDR{{.*}}Volatile
+; CHECK-NOT: ch SU
+; CHECK: ch SU(2): Latency=1
+; CHECK-NOT: ch SU
+; CHECK: ** List Scheduling
+; CHECK: SU(2){{.*}}STR{{.*}}
+; CHECK-NOT: ch SU
+; CHECK: ch SU(3): Latency=1
+; CHECK-NOT: ch SU
+; CHECK: SU(3){{.*}}LDR{{.*}}
+; CHECK-NOT: ch SU
+; CHECK: ch SU(2): Latency=1
+; CHECK-NOT: ch SU
+define i32 @f1(i32* nocapture %p1, i32* nocapture %p2) nounwind {
+entry:
+ store volatile i32 65540, i32* %p1, align 4, !tbaa !0
+ %0 = load volatile i32* %p2, align 4, !tbaa !0
+ ret i32 %0
+}
+
+define i32 @f2(i32* nocapture %p1, i32* nocapture %p2) nounwind {
+entry:
+ store i32 65540, i32* %p1, align 4, !tbaa !0
+ %0 = load i32* %p2, align 4, !tbaa !0
+ ret i32 %0
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll b/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll
new file mode 100644
index 000000000000..e4ad45bf526e
--- /dev/null
+++ b/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll
@@ -0,0 +1,174 @@
+; RUN: llc < %s
+; PR13377
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+%0 = type { <4 x float> }
+
+define arm_aapcs_vfpcc void @foo(float, i1 zeroext, i1 zeroext) nounwind uwtable {
+ br i1 undef, label %4, label %5
+
+; <label>:4 ; preds = %3
+ unreachable
+
+; <label>:5 ; preds = %3
+ br i1 undef, label %7, label %6
+
+; <label>:6 ; preds = %5
+ unreachable
+
+; <label>:7 ; preds = %5
+ br i1 undef, label %8, label %10
+
+; <label>:8 ; preds = %7
+ br i1 undef, label %9, label %10
+
+; <label>:9 ; preds = %8
+ br i1 undef, label %11, label %10
+
+; <label>:10 ; preds = %9, %8, %7
+ unreachable
+
+; <label>:11 ; preds = %9
+ br i1 undef, label %13, label %12
+
+; <label>:12 ; preds = %11
+ unreachable
+
+; <label>:13 ; preds = %11
+ br i1 undef, label %15, label %14
+
+; <label>:14 ; preds = %13
+ unreachable
+
+; <label>:15 ; preds = %13
+ br i1 undef, label %18, label %16
+
+; <label>:16 ; preds = %15
+ br i1 undef, label %17, label %18
+
+; <label>:17 ; preds = %16
+ unreachable
+
+; <label>:18 ; preds = %16, %15
+ br i1 undef, label %68, label %19
+
+; <label>:19 ; preds = %18
+ br label %20
+
+; <label>:20 ; preds = %20, %19
+ br i1 undef, label %21, label %20
+
+; <label>:21 ; preds = %20
+ br i1 undef, label %22, label %68
+
+; <label>:22 ; preds = %21
+ br i1 undef, label %23, label %24
+
+; <label>:23 ; preds = %22
+ unreachable
+
+; <label>:24 ; preds = %22
+ br i1 undef, label %26, label %25
+
+; <label>:25 ; preds = %24
+ unreachable
+
+; <label>:26 ; preds = %24
+ br i1 undef, label %28, label %27
+
+; <label>:27 ; preds = %26
+ unreachable
+
+; <label>:28 ; preds = %26
+ br i1 undef, label %29, label %30, !prof !0
+
+; <label>:29 ; preds = %28
+ br label %30
+
+; <label>:30 ; preds = %29, %28
+ br i1 undef, label %31, label %32, !prof !0
+
+; <label>:31 ; preds = %30
+ br label %32
+
+; <label>:32 ; preds = %31, %30
+ br i1 undef, label %34, label %33
+
+; <label>:33 ; preds = %32
+ unreachable
+
+; <label>:34 ; preds = %32
+ br i1 undef, label %35, label %36, !prof !0
+
+; <label>:35 ; preds = %34
+ br label %36
+
+; <label>:36 ; preds = %35, %34
+ br i1 undef, label %37, label %38, !prof !0
+
+; <label>:37 ; preds = %36
+ br label %38
+
+; <label>:38 ; preds = %37, %36
+ br i1 undef, label %39, label %67
+
+; <label>:39 ; preds = %38
+ br i1 undef, label %40, label %41
+
+; <label>:40 ; preds = %39
+ br i1 undef, label %64, label %41
+
+; <label>:41 ; preds = %40, %39
+ br i1 undef, label %64, label %42
+
+; <label>:42 ; preds = %41
+ %43 = fadd <4 x float> undef, undef
+ %44 = fadd <4 x float> undef, undef
+ %45 = fmul <4 x float> undef, undef
+ %46 = fmul <4 x float> %45, %43
+ %47 = fmul <4 x float> undef, %44
+ %48 = load <4 x float>* undef, align 8, !tbaa !1
+ %49 = bitcast <4 x float> %48 to <2 x i64>
+ %50 = shufflevector <2 x i64> %49, <2 x i64> undef, <1 x i32> <i32 1>
+ %51 = bitcast <1 x i64> %50 to <2 x float>
+ %52 = shufflevector <2 x float> %51, <2 x float> undef, <4 x i32> zeroinitializer
+ %53 = bitcast <4 x float> %52 to <2 x i64>
+ %54 = shufflevector <2 x i64> %53, <2 x i64> undef, <1 x i32> zeroinitializer
+ %55 = bitcast <1 x i64> %54 to <2 x float>
+ %56 = extractelement <2 x float> %55, i32 0
+ %57 = insertelement <4 x float> undef, float %56, i32 2
+ %58 = insertelement <4 x float> %57, float 1.000000e+00, i32 3
+ %59 = fsub <4 x float> %47, %58
+ %60 = fmul <4 x float> undef, undef
+ %61 = fmul <4 x float> %59, %60
+ %62 = fmul <4 x float> %61, <float 6.000000e+01, float 6.000000e+01, float 6.000000e+01, float 6.000000e+01>
+ %63 = fadd <4 x float> %47, %62
+ store <4 x float> %46, <4 x float>* undef, align 8, !tbaa !1
+ call arm_aapcs_vfpcc void @bar(%0* undef, float 0.000000e+00) nounwind
+ call arm_aapcs_vfpcc void @bar(%0* undef, float 0.000000e+00) nounwind
+ store <4 x float> %63, <4 x float>* undef, align 8, !tbaa !1
+ unreachable
+
+; <label>:64 ; preds = %41, %40
+ br i1 undef, label %65, label %66
+
+; <label>:65 ; preds = %64
+ unreachable
+
+; <label>:66 ; preds = %64
+ unreachable
+
+; <label>:67 ; preds = %38
+ unreachable
+
+; <label>:68 ; preds = %21, %18
+ ret void
+}
+
+declare arm_aapcs_vfpcc void @bar(%0*, float)
+
+!0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/ARM/2012-08-08-legalize-unaligned.ll b/test/CodeGen/ARM/2012-08-08-legalize-unaligned.ll
new file mode 100644
index 000000000000..bdcd1b6ad4b7
--- /dev/null
+++ b/test/CodeGen/ARM/2012-08-08-legalize-unaligned.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s
+; PR13111
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
+target triple = "armv7-none-linux-gnueabi"
+
+define void @test_hi_char8() noinline {
+entry:
+ %0 = load <4 x i8>* undef, align 1
+ store <4 x i8> %0, <4 x i8>* null, align 4
+ ret void
+}
diff --git a/test/CodeGen/ARM/2012-08-09-neon-extload.ll b/test/CodeGen/ARM/2012-08-09-neon-extload.ll
new file mode 100644
index 000000000000..b55f1cae7fe6
--- /dev/null
+++ b/test/CodeGen/ARM/2012-08-09-neon-extload.ll
@@ -0,0 +1,102 @@
+; RUN: llc -mtriple=armv7-none-linux-gnueabi < %s | FileCheck %s
+
+@var_v2i8 = global <2 x i8> zeroinitializer
+@var_v4i8 = global <4 x i8> zeroinitializer
+
+@var_v2i16 = global <2 x i16> zeroinitializer
+@var_v4i16 = global <4 x i16> zeroinitializer
+
+@var_v2i32 = global <2 x i32> zeroinitializer
+@var_v4i32 = global <4 x i32> zeroinitializer
+
+@var_v2i64 = global <2 x i64> zeroinitializer
+
+define void @test_v2i8tov2i32() {
+; CHECK: test_v2i8tov2i32:
+
+ %i8val = load <2 x i8>* @var_v2i8
+
+ %i32val = sext <2 x i8> %i8val to <2 x i32>
+ store <2 x i32> %i32val, <2 x i32>* @var_v2i32
+; CHECK: vld1.16 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :16]
+; CHECK: vmovl.s8 {{q[0-9]+}}, d[[LOAD]]
+; CHECK: vmovl.s16 {{q[0-9]+}}, {{d[0-9]+}}
+
+ ret void
+}
+
+define void @test_v2i8tov2i64() {
+; CHECK: test_v2i8tov2i64:
+
+ %i8val = load <2 x i8>* @var_v2i8
+
+ %i64val = sext <2 x i8> %i8val to <2 x i64>
+ store <2 x i64> %i64val, <2 x i64>* @var_v2i64
+; CHECK: vld1.16 {d{{[0-9]+}}[0]}, [{{r[0-9]+}}, :16]
+; CHECK: vmovl.s8 {{q[0-9]+}}, d[[LOAD]]
+; CHECK: vmovl.s16 {{q[0-9]+}}, {{d[0-9]+}}
+; CHECK: vmovl.s32 {{q[0-9]+}}, {{d[0-9]+}}
+
+; %i64val = sext <2 x i8> %i8val to <2 x i64>
+; store <2 x i64> %i64val, <2 x i64>* @var_v2i64
+
+ ret void
+}
+
+define void @test_v4i8tov4i16() {
+; CHECK: test_v4i8tov4i16:
+
+ %i8val = load <4 x i8>* @var_v4i8
+
+ %i16val = sext <4 x i8> %i8val to <4 x i16>
+ store <4 x i16> %i16val, <4 x i16>* @var_v4i16
+; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :32]
+; CHECK: vmovl.s8 {{q[0-9]+}}, d[[LOAD]]
+; CHECK-NOT: vmovl.s16
+
+ ret void
+; CHECK: bx lr
+}
+
+define void @test_v4i8tov4i32() {
+; CHECK: test_v4i8tov4i32:
+
+ %i8val = load <4 x i8>* @var_v4i8
+
+ %i16val = sext <4 x i8> %i8val to <4 x i32>
+ store <4 x i32> %i16val, <4 x i32>* @var_v4i32
+; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :32]
+; CHECK: vmovl.s8 {{q[0-9]+}}, d[[LOAD]]
+; CHECK: vmovl.s16 {{q[0-9]+}}, {{d[0-9]+}}
+
+ ret void
+}
+
+define void @test_v2i16tov2i32() {
+; CHECK: test_v2i16tov2i32:
+
+ %i16val = load <2 x i16>* @var_v2i16
+
+ %i32val = sext <2 x i16> %i16val to <2 x i32>
+ store <2 x i32> %i32val, <2 x i32>* @var_v2i32
+; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :32]
+; CHECK: vmovl.s16 {{q[0-9]+}}, d[[LOAD]]
+; CHECK-NOT: vmovl
+
+ ret void
+; CHECK: bx lr
+}
+
+define void @test_v2i16tov2i64() {
+; CHECK: test_v2i16tov2i64:
+
+ %i16val = load <2 x i16>* @var_v2i16
+
+ %i64val = sext <2 x i16> %i16val to <2 x i64>
+ store <2 x i64> %i64val, <2 x i64>* @var_v2i64
+; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :32]
+; CHECK: vmovl.s16 {{q[0-9]+}}, d[[LOAD]]
+; CHECK: vmovl.s32 {{q[0-9]+}}, d[[LOAD]]
+
+ ret void
+}
diff --git a/test/CodeGen/ARM/2012-08-13-bfi.ll b/test/CodeGen/ARM/2012-08-13-bfi.ll
new file mode 100644
index 000000000000..8263833d9874
--- /dev/null
+++ b/test/CodeGen/ARM/2012-08-13-bfi.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=thumb -mcpu=cortex-a8 < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; CHECK: foo
+; CHECK-NOT: bfi
+; CHECK: bx
+define i32 @foo(i8 zeroext %i) nounwind uwtable readnone ssp {
+ %1 = and i8 %i, 15
+ %2 = zext i8 %1 to i32
+ %3 = icmp ult i8 %1, 10
+ %4 = or i32 %2, 48
+ %5 = add nsw i32 %2, 55
+ %6 = select i1 %3, i32 %4, i32 %5
+ ret i32 %6
+}
diff --git a/test/CodeGen/ARM/addrmode.ll b/test/CodeGen/ARM/addrmode.ll
index 9ccff07d456b..6da90897b94b 100644
--- a/test/CodeGen/ARM/addrmode.ll
+++ b/test/CodeGen/ARM/addrmode.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -stats |& grep asm-printer | grep 4
+; RUN: llc < %s -march=arm -stats 2>&1 | grep asm-printer | grep 4
define i32 @t1(i32 %a) {
%b = mul i32 %a, 9
diff --git a/test/CodeGen/ARM/aliases.ll b/test/CodeGen/ARM/aliases.ll
index 31c500756c4e..d668334f8d6a 100644
--- a/test/CodeGen/ARM/aliases.ll
+++ b/test/CodeGen/ARM/aliases.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=arm-linux-gnueabi -o %t
-; RUN: grep { = } %t | count 5
+; RUN: grep " = " %t | count 5
; RUN: grep globl %t | count 4
; RUN: grep weak %t | count 1
diff --git a/test/CodeGen/ARM/arm-modifier.ll b/test/CodeGen/ARM/arm-modifier.ll
index 396de37aef66..5e12d8e03555 100644
--- a/test/CodeGen/ARM/arm-modifier.ll
+++ b/test/CodeGen/ARM/arm-modifier.ll
@@ -57,3 +57,12 @@ store i64 %0, i64* @f3_var, align 4
store i64 %1, i64* @f3_var, align 4
ret void
}
+
+define i64 @f4(i64* %val) nounwind {
+entry:
+ ;CHECK: f4
+ ;CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], {{r[0-9]?[13579]}}, [r0]
+ ;CHECK: mov r0, [[REG1]]
+ %0 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [$1]", "=&r,r,*Qo"(i64* %val, i64* %val) nounwind
+ ret i64 %0
+}
diff --git a/test/CodeGen/ARM/bicZext.ll b/test/CodeGen/ARM/bicZext.ll
new file mode 100644
index 000000000000..cf4b7ba0e044
--- /dev/null
+++ b/test/CodeGen/ARM/bicZext.ll
@@ -0,0 +1,19 @@
+; RUN: llc %s -o - | FileCheck %s
+; ModuleID = 'bic.c'
+target triple = "thumbv7-apple-ios3.0.0"
+
+define zeroext i16 @foo16(i16 zeroext %f) nounwind readnone optsize ssp {
+entry:
+ ; CHECK: .thumb_func _foo16
+ ; CHECK: {{bic[^#]*#3}}
+ %and = and i16 %f, -4
+ ret i16 %and
+}
+
+define i32 @foo32(i32 %f) nounwind readnone optsize ssp {
+entry:
+ ; CHECK: .thumb_func _foo32
+ ; CHECK: {{bic[^#]*#3}}
+ %and = and i32 %f, -4
+ ret i32 %and
+}
diff --git a/test/CodeGen/ARM/call_nolink.ll b/test/CodeGen/ARM/call_nolink.ll
index efe29d857d23..00b16888f389 100644
--- a/test/CodeGen/ARM/call_nolink.ll
+++ b/test/CodeGen/ARM/call_nolink.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
-; RUN: not grep {bx lr}
+; RUN: not grep "bx lr"
%struct.anon = type { i32 (i32, i32, i32)*, i32, i32, [3 x i32], i8*, i8*, i8* }
@r = external global [14 x i32] ; <[14 x i32]*> [#uses=4]
diff --git a/test/CodeGen/ARM/cmn.ll b/test/CodeGen/ARM/cmn.ll
new file mode 100644
index 000000000000..ef73165366ad
--- /dev/null
+++ b/test/CodeGen/ARM/cmn.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple thumbv7-apple-ios | FileCheck %s
+; <rdar://problem/7569620>
+
+define i32 @compare_i_gt(i32 %a) {
+entry:
+; CHECK: compare_i_gt
+; CHECK-NOT: mvn
+; CHECK: cmn
+ %cmp = icmp sgt i32 %a, -78
+ %. = zext i1 %cmp to i32
+ ret i32 %.
+}
+
+define i32 @compare_r_eq(i32 %a, i32 %b) {
+entry:
+; CHECK: compare_r_eq
+; CHECK: cmn
+ %sub = sub nsw i32 0, %b
+ %cmp = icmp eq i32 %a, %sub
+ %. = zext i1 %cmp to i32
+ ret i32 %.
+}
diff --git a/test/CodeGen/ARM/coalesce-subregs.ll b/test/CodeGen/ARM/coalesce-subregs.ll
new file mode 100644
index 000000000000..fb0f4c67c927
--- /dev/null
+++ b/test/CodeGen/ARM/coalesce-subregs.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s -mcpu=cortex-a9 | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios0.0.0"
+
+; CHECK: f
+; The vld2 and vst2 are not aligned wrt each other, the second Q loaded is the
+; first one stored.
+; The coalescer must find a super-register larger than QQ to eliminate the copy
+; setting up the vst2 data.
+; CHECK: vld2
+; CHECK-NOT: vorr
+; CHECK-NOT: vmov
+; CHECK: vst2
+define void @f(float* %p, i32 %c) nounwind ssp {
+entry:
+ %0 = bitcast float* %p to i8*
+ %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %0, i32 4)
+ %vld221 = extractvalue { <4 x float>, <4 x float> } %vld2, 1
+ %add.ptr = getelementptr inbounds float* %p, i32 8
+ %1 = bitcast float* %add.ptr to i8*
+ tail call void @llvm.arm.neon.vst2.v4f32(i8* %1, <4 x float> %vld221, <4 x float> undef, i32 4)
+ ret void
+}
+
+; CHECK: f1
+; FIXME: This function still has copies.
+define void @f1(float* %p, i32 %c) nounwind ssp {
+entry:
+ %0 = bitcast float* %p to i8*
+ %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %0, i32 4)
+ %vld221 = extractvalue { <4 x float>, <4 x float> } %vld2, 1
+ %add.ptr = getelementptr inbounds float* %p, i32 8
+ %1 = bitcast float* %add.ptr to i8*
+ %vld22 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %1, i32 4)
+ %vld2215 = extractvalue { <4 x float>, <4 x float> } %vld22, 0
+ tail call void @llvm.arm.neon.vst2.v4f32(i8* %1, <4 x float> %vld221, <4 x float> %vld2215, i32 4)
+ ret void
+}
+
+; CHECK: f2
+; FIXME: This function still has copies.
+define void @f2(float* %p, i32 %c) nounwind ssp {
+entry:
+ %0 = bitcast float* %p to i8*
+ %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %0, i32 4)
+ %vld224 = extractvalue { <4 x float>, <4 x float> } %vld2, 1
+ br label %do.body
+
+do.body: ; preds = %do.body, %entry
+ %qq0.0.1.0 = phi <4 x float> [ %vld224, %entry ], [ %vld2216, %do.body ]
+ %c.addr.0 = phi i32 [ %c, %entry ], [ %dec, %do.body ]
+ %p.addr.0 = phi float* [ %p, %entry ], [ %add.ptr, %do.body ]
+ %add.ptr = getelementptr inbounds float* %p.addr.0, i32 8
+ %1 = bitcast float* %add.ptr to i8*
+ %vld22 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %1, i32 4)
+ %vld2215 = extractvalue { <4 x float>, <4 x float> } %vld22, 0
+ %vld2216 = extractvalue { <4 x float>, <4 x float> } %vld22, 1
+ tail call void @llvm.arm.neon.vst2.v4f32(i8* %1, <4 x float> %qq0.0.1.0, <4 x float> %vld2215, i32 4)
+ %dec = add nsw i32 %c.addr.0, -1
+ %tobool = icmp eq i32 %dec, 0
+ br i1 %tobool, label %do.end, label %do.body
+
+do.end: ; preds = %do.body
+ ret void
+}
+
+declare { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8*, i32) nounwind readonly
+declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
diff --git a/test/CodeGen/ARM/crash-greedy.ll b/test/CodeGen/ARM/crash-greedy.ll
index 8a865e23d0a4..a3d49f620e9c 100644
--- a/test/CodeGen/ARM/crash-greedy.ll
+++ b/test/CodeGen/ARM/crash-greedy.ll
@@ -82,3 +82,49 @@ if.then195: ; preds = %if.then84
if.end251: ; preds = %if.then195, %if.then84, %entry
ret void
}
+
+; Coalescer failure: removeCopyByCommutingDef leaves a bad kill flag
+; behind.
+define void @rdar11950722() nounwind readonly optsize ssp align 2 {
+entry:
+ br i1 undef, label %land.lhs.true7, label %lor.lhs.false.i
+
+lor.lhs.false.i:
+ br i1 undef, label %if.then10.i, label %land.lhs.true7
+
+if.then10.i:
+ %xFlags.1.i = select i1 undef, i32 0, i32 undef
+ br i1 undef, label %land.lhs.true33.i, label %f.exit
+
+land.lhs.true33.i:
+ %and26.i = and i32 %xFlags.1.i, 8
+ %cmp27.i = icmp eq i32 %and26.i, 0
+ %and29.i = and i32 %xFlags.1.i, 2147483645
+ %xFlags.1.and29.i = select i1 %cmp27.i, i32 %xFlags.1.i, i32 %and29.i
+ %and34.i = and i32 %xFlags.1.i, 8
+ %cmp35.i = icmp eq i32 %and34.i, 0
+ %and37.i = and i32 %xFlags.1.i, 2147483645
+ %yFlags.1.and37.i = select i1 %cmp35.i, i32 %xFlags.1.i, i32 %and37.i
+ br label %f.exit
+
+f.exit:
+ %xFlags.3.i = phi i32 [ %xFlags.1.and29.i, %land.lhs.true33.i ], [ %xFlags.1.i, %if.then10.i ]
+ %yFlags.2.i = phi i32 [ %yFlags.1.and37.i, %land.lhs.true33.i ], [ %xFlags.1.i, %if.then10.i ]
+ %cmp40.i = icmp eq i32 %xFlags.3.i, %yFlags.2.i
+ br i1 %cmp40.i, label %land.lhs.true7, label %land.end
+
+land.lhs.true7:
+ br i1 undef, label %land.lhs.true34, label %lor.lhs.false27
+
+lor.lhs.false27:
+ br i1 undef, label %land.lhs.true34, label %land.end
+
+land.lhs.true34:
+ br i1 undef, label %land.end, label %lor.lhs.false44
+
+lor.lhs.false44:
+ ret void
+
+land.end:
+ ret void
+}
diff --git a/test/CodeGen/ARM/cse-libcalls.ll b/test/CodeGen/ARM/cse-libcalls.ll
index 1d011be93c3c..62b9e4380b2a 100644
--- a/test/CodeGen/ARM/cse-libcalls.ll
+++ b/test/CodeGen/ARM/cse-libcalls.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | grep {bl.\*__ltdf} | count 1
+; RUN: llc < %s -march=arm | grep "bl.*__ltdf" | count 1
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i386-apple-darwin8"
diff --git a/test/CodeGen/ARM/data-in-code-annotations.ll b/test/CodeGen/ARM/data-in-code-annotations.ll
new file mode 100644
index 000000000000..a66a9d1292f0
--- /dev/null
+++ b/test/CodeGen/ARM/data-in-code-annotations.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+
+define double @f1() nounwind {
+; CHECK: f1:
+; CHECK: .data_region
+; CHECK: .long 1413754129
+; CHECK: .long 1074340347
+; CHECK: .end_data_region
+ ret double 0x400921FB54442D11
+}
+
+
+define i32 @f2() {
+; CHECK: f2:
+; CHECK: .data_region jt32
+; CHECK: .end_data_region
+
+entry:
+ switch i32 undef, label %return [
+ i32 1, label %sw.bb
+ i32 2, label %sw.bb6
+ i32 3, label %sw.bb13
+ i32 4, label %sw.bb20
+ ]
+
+sw.bb: ; preds = %entry
+ br label %return
+
+sw.bb6: ; preds = %entry
+ br label %return
+
+sw.bb13: ; preds = %entry
+ br label %return
+
+sw.bb20: ; preds = %entry
+ %div = sdiv i32 undef, undef
+ br label %return
+
+return: ; preds = %sw.bb20, %sw.bb13, %sw.bb6, %sw.bb, %entry
+ %retval.0 = phi i32 [ %div, %sw.bb20 ], [ undef, %sw.bb13 ], [ undef, %sw.bb6 ], [ undef, %sw.bb ], [ 0, %entry ]
+ ret i32 %retval.0
+}
diff --git a/test/CodeGen/ARM/debug-info-branch-folding.ll b/test/CodeGen/ARM/debug-info-branch-folding.ll
index 9bdae436de46..4f4ff8e81707 100644
--- a/test/CodeGen/ARM/debug-info-branch-folding.ll
+++ b/test/CodeGen/ARM/debug-info-branch-folding.ll
@@ -3,16 +3,17 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
target triple = "thumbv7-apple-macosx10.6.7"
;CHECK: vadd.f32 q4, q8, q8
-;CHECK-NEXT: Ltmp
-;CHECK-NEXT: @DEBUG_VALUE: y <- Q4+0
-;CHECK-NEXT: @DEBUG_VALUE: x <- Q4+0
+;CHECK-NEXT: Ltmp1
+
+;CHECK:@DEBUG_VALUE: x <- Q4+0
+;CHECK-NEXT:@DEBUG_VALUE: y <- Q4+0
@.str = external constant [13 x i8]
declare <4 x float> @test0001(float) nounwind readnone ssp
-define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+define i32 @main(i32 %argc, i8** nocapture %argv, i1 %cond) nounwind ssp {
entry:
br label %for.body9
@@ -21,7 +22,7 @@ for.body9: ; preds = %for.body9, %entry
tail call void @llvm.dbg.value(metadata !{<4 x float> %add19}, i64 0, metadata !27), !dbg !39
%add20 = fadd <4 x float> undef, <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, !dbg !39
tail call void @llvm.dbg.value(metadata !{<4 x float> %add20}, i64 0, metadata !28), !dbg !39
- br i1 undef, label %for.end54, label %for.body9, !dbg !44
+ br i1 %cond, label %for.end54, label %for.body9, !dbg !44
for.end54: ; preds = %for.body9
%tmp115 = extractelement <4 x float> %add19, i32 1
@@ -52,7 +53,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
!7 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
!8 = metadata !{metadata !9}
!9 = metadata !{i32 589857, i64 0, i64 3} ; [ DW_TAG_subrange_type ]
-!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**, i1)* @main, null} ; [ DW_TAG_subprogram ]
!11 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
!12 = metadata !{metadata !13}
!13 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/CodeGen/ARM/divmod.ll b/test/CodeGen/ARM/divmod.ll
index 49c41037578c..7fbf8f409036 100644
--- a/test/CodeGen/ARM/divmod.ll
+++ b/test/CodeGen/ARM/divmod.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm-apple-ios5.0 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios5.0 -mcpu=cortex-a8 | FileCheck %s
define void @foo(i32 %x, i32 %y, i32* nocapture %P) nounwind ssp {
entry:
@@ -56,3 +56,17 @@ bb1:
declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readnone
declare i8* @__memset_chk(i8*, i32, i32, i32) nounwind
+
+; rdar://11714607
+define i32 @howmany(i32 %x, i32 %y) nounwind {
+entry:
+; CHECK: howmany:
+; CHECK: bl ___udivmodsi4
+; CHECK-NOT: ___udivsi3
+ %rem = urem i32 %x, %y
+ %div = udiv i32 %x, %y
+ %not.cmp = icmp ne i32 %rem, 0
+ %add = zext i1 %not.cmp to i32
+ %cond = add i32 %add, %div
+ ret i32 %cond
+}
diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll
index 45c322dce8b9..bcb4ee745234 100644
--- a/test/CodeGen/ARM/fabss.ll
+++ b/test/CodeGen/ARM/fabss.ll
@@ -1,12 +1,12 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+neon | FileCheck %s -check-prefix=NFP0
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
define float @test(float %a, float %b) {
entry:
%dum = fadd float %a, %b
- %0 = tail call float @fabsf(float %dum)
+ %0 = tail call float @fabsf(float %dum) readnone
%dum1 = fadd float %0, %b
ret float %dum1
}
diff --git a/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll b/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll
new file mode 100644
index 000000000000..14721a4d8024
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+; Fast-isel can't handle non-double multi-reg retvals.
+; This test just check to make sure we don't hit the assert in FinishCall.
+define <16 x i8> @foo() nounwind ssp {
+entry:
+ ret <16 x i8> zeroinitializer
+}
+
+define void @t1() nounwind ssp {
+entry:
+; ARM: @t1
+; THUMB: @t1
+ %call = call <16 x i8> @foo()
+ ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-call.ll b/test/CodeGen/ARM/fast-isel-call.ll
index dd460b2a0361..edc805a47d6a 100644
--- a/test/CodeGen/ARM/fast-isel-call.ll
+++ b/test/CodeGen/ARM/fast-isel-call.ll
@@ -1,5 +1,7 @@
; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=THUMB-LONG
define i32 @t0(i1 zeroext %a) nounwind {
%1 = zext i1 %a to i32
@@ -99,6 +101,11 @@ entry:
; ARM: uxtb r9, r12
; ARM: str r9, [sp, #4]
; ARM: bl _bar
+; ARM-LONG: @t10
+; ARM-LONG: movw lr, :lower16:L_bar$non_lazy_ptr
+; ARM-LONG: movt lr, :upper16:L_bar$non_lazy_ptr
+; ARM-LONG: ldr lr, [lr]
+; ARM-LONG: blx lr
; THUMB: @t10
; THUMB: movs r0, #0
; THUMB: movt r0, #0
@@ -121,8 +128,96 @@ entry:
; THUMB: uxtb.w r9, r12
; THUMB: str.w r9, [sp, #4]
; THUMB: bl _bar
+; THUMB-LONG: @t10
+; THUMB-LONG: movw lr, :lower16:L_bar$non_lazy_ptr
+; THUMB-LONG: movt lr, :upper16:L_bar$non_lazy_ptr
+; THUMB-LONG: ldr.w lr, [lr]
+; THUMB-LONG: blx lr
%call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70)
ret i32 0
}
declare i32 @bar(i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext)
+
+define i32 @bar0(i32 %i) nounwind {
+ ret i32 0
+}
+
+define void @foo3() uwtable {
+; ARM: movw r0, #0
+; ARM: movw r1, :lower16:_bar0
+; ARM: movt r1, :upper16:_bar0
+; ARM: blx r1
+; THUMB: movs r0, #0
+; THUMB: movw r1, :lower16:_bar0
+; THUMB: movt r1, :upper16:_bar0
+; THUMB: blx r1
+ %fptr = alloca i32 (i32)*, align 8
+ store i32 (i32)* @bar0, i32 (i32)** %fptr, align 8
+ %1 = load i32 (i32)** %fptr, align 8
+ %call = call i32 %1(i32 0)
+ ret void
+}
+
+define i32 @LibCall(i32 %a, i32 %b) {
+entry:
+; ARM: LibCall
+; ARM: bl ___udivsi3
+; ARM-LONG: LibCall
+; ARM-LONG: movw r2, :lower16:L___udivsi3$non_lazy_ptr
+; ARM-LONG: movt r2, :upper16:L___udivsi3$non_lazy_ptr
+; ARM-LONG: ldr r2, [r2]
+; ARM-LONG: blx r2
+; THUMB: LibCall
+; THUMB: bl ___udivsi3
+; THUMB-LONG: LibCall
+; THUMB-LONG: movw r2, :lower16:L___udivsi3$non_lazy_ptr
+; THUMB-LONG: movt r2, :upper16:L___udivsi3$non_lazy_ptr
+; THUMB-LONG: ldr r2, [r2]
+; THUMB-LONG: blx r2
+ %tmp1 = udiv i32 %a, %b ; <i32> [#uses=1]
+ ret i32 %tmp1
+}
+
+define i32 @VarArg() nounwind {
+entry:
+ %i = alloca i32, align 4
+ %j = alloca i32, align 4
+ %k = alloca i32, align 4
+ %m = alloca i32, align 4
+ %n = alloca i32, align 4
+ %tmp = alloca i32, align 4
+ %0 = load i32* %i, align 4
+ %1 = load i32* %j, align 4
+ %2 = load i32* %k, align 4
+ %3 = load i32* %m, align 4
+ %4 = load i32* %n, align 4
+; ARM: VarArg
+; ARM: mov r7, sp
+; ARM: movw r0, #5
+; ARM: ldr r1, [r7, #-4]
+; ARM: ldr r2, [r7, #-8]
+; ARM: ldr r3, [r7, #-12]
+; ARM: ldr r9, [sp, #16]
+; ARM: ldr r12, [sp, #12]
+; ARM: str r9, [sp]
+; ARM: str r12, [sp, #4]
+; ARM: bl _CallVariadic
+; THUMB: mov r7, sp
+; THUMB: movs r0, #5
+; THUMB: movt r0, #0
+; THUMB: ldr r1, [sp, #28]
+; THUMB: ldr r2, [sp, #24]
+; THUMB: ldr r3, [sp, #20]
+; THUMB: ldr.w r9, [sp, #16]
+; THUMB: ldr.w r12, [sp, #12]
+; THUMB: str.w r9, [sp]
+; THUMB: str.w r12, [sp, #4]
+; THUMB: bl _CallVariadic
+ %call = call i32 (i32, ...)* @CallVariadic(i32 5, i32 %0, i32 %1, i32 %2, i32 %3, i32 %4)
+ store i32 %call, i32* %tmp, align 4
+ %5 = load i32* %tmp, align 4
+ ret i32 %5
+}
+
+declare i32 @CallVariadic(i32, ...)
diff --git a/test/CodeGen/ARM/fast-isel-frameaddr.ll b/test/CodeGen/ARM/fast-isel-frameaddr.ll
new file mode 100644
index 000000000000..8f7b2943b56d
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-frameaddr.ll
@@ -0,0 +1,100 @@
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=DARWIN-ARM
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=LINUX-ARM
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=DARWIN-THUMB2
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=thumbv7-linux-gnueabi | FileCheck %s --check-prefix=LINUX-THUMB2
+
+define i8* @frameaddr_index0() nounwind {
+entry:
+; DARWIN-ARM: frameaddr_index0:
+; DARWIN-ARM: push {r7}
+; DARWIN-ARM: mov r7, sp
+; DARWIN-ARM: mov r0, r7
+
+; DARWIN-THUMB2: frameaddr_index0:
+; DARWIN-THUMB2: str r7, [sp, #-4]!
+; DARWIN-THUMB2: mov r7, sp
+; DARWIN-THUMB2: mov r0, r7
+
+; LINUX-ARM: frameaddr_index0:
+; LINUX-ARM: push {r11}
+; LINUX-ARM: mov r11, sp
+; LINUX-ARM: mov r0, r11
+
+; LINUX-THUMB2: frameaddr_index0:
+; LINUX-THUMB2: str r7, [sp, #-4]!
+; LINUX-THUMB2: mov r7, sp
+; LINUX-THUMB2: mov r0, r7
+
+ %0 = call i8* @llvm.frameaddress(i32 0)
+ ret i8* %0
+}
+
+define i8* @frameaddr_index1() nounwind {
+entry:
+; DARWIN-ARM: frameaddr_index1:
+; DARWIN-ARM: push {r7}
+; DARWIN-ARM: mov r7, sp
+; DARWIN-ARM: mov r0, r7
+; DARWIN-ARM: ldr r0, [r0]
+
+; DARWIN-THUMB2: frameaddr_index1:
+; DARWIN-THUMB2: str r7, [sp, #-4]!
+; DARWIN-THUMB2: mov r7, sp
+; DARWIN-THUMB2: mov r0, r7
+; DARWIN-THUMB2: ldr r0, [r0]
+
+; LINUX-ARM: frameaddr_index1:
+; LINUX-ARM: push {r11}
+; LINUX-ARM: mov r11, sp
+; LINUX-ARM: mov r0, r11
+; LINUX-ARM: ldr r0, [r0]
+
+; LINUX-THUMB2: frameaddr_index1:
+; LINUX-THUMB2: str r7, [sp, #-4]!
+; LINUX-THUMB2: mov r7, sp
+; LINUX-THUMB2: mov r0, r7
+; LINUX-THUMB2: ldr r0, [r0]
+
+ %0 = call i8* @llvm.frameaddress(i32 1)
+ ret i8* %0
+}
+
+define i8* @frameaddr_index3() nounwind {
+entry:
+; DARWIN-ARM: frameaddr_index3:
+; DARWIN-ARM: push {r7}
+; DARWIN-ARM: mov r7, sp
+; DARWIN-ARM: mov r0, r7
+; DARWIN-ARM: ldr r0, [r0]
+; DARWIN-ARM: ldr r0, [r0]
+; DARWIN-ARM: ldr r0, [r0]
+
+; DARWIN-THUMB2: frameaddr_index3:
+; DARWIN-THUMB2: str r7, [sp, #-4]!
+; DARWIN-THUMB2: mov r7, sp
+; DARWIN-THUMB2: mov r0, r7
+; DARWIN-THUMB2: ldr r0, [r0]
+; DARWIN-THUMB2: ldr r0, [r0]
+; DARWIN-THUMB2: ldr r0, [r0]
+
+; LINUX-ARM: frameaddr_index3:
+; LINUX-ARM: push {r11}
+; LINUX-ARM: mov r11, sp
+; LINUX-ARM: mov r0, r11
+; LINUX-ARM: ldr r0, [r0]
+; LINUX-ARM: ldr r0, [r0]
+; LINUX-ARM: ldr r0, [r0]
+
+; LINUX-THUMB2: frameaddr_index3:
+; LINUX-THUMB2: str r7, [sp, #-4]!
+; LINUX-THUMB2: mov r7, sp
+; LINUX-THUMB2: mov r0, r7
+; LINUX-THUMB2: ldr r0, [r0]
+; LINUX-THUMB2: ldr r0, [r0]
+; LINUX-THUMB2: ldr r0, [r0]
+
+ %0 = call i8* @llvm.frameaddress(i32 3)
+ ret i8* %0
+}
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone
diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll
index e6bdfa78d49b..b73fceff6cd0 100644
--- a/test/CodeGen/ARM/fast-isel-intrinsic.ll
+++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll
@@ -1,5 +1,7 @@
; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=THUMB-LONG
@message1 = global [60 x i8] c"The LLVM Compiler Infrastructure\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 1
@temp = common global [60 x i8] zeroinitializer, align 1
@@ -13,6 +15,11 @@ define void @t1() nounwind ssp {
; ARM: movw r2, #10
; ARM: uxtb r1, r1
; ARM: bl _memset
+; ARM-LONG: t1
+; ARM-LONG: movw r3, :lower16:L_memset$non_lazy_ptr
+; ARM-LONG: movt r3, :upper16:L_memset$non_lazy_ptr
+; ARM-LONG: ldr r3, [r3]
+; ARM-LONG: blx r3
; THUMB: t1
; THUMB: movw r0, :lower16:_message1
; THUMB: movt r0, :upper16:_message1
@@ -23,6 +30,11 @@ define void @t1() nounwind ssp {
; THUMB: movt r2, #0
; THUMB: uxtb r1, r1
; THUMB: bl _memset
+; THUMB-LONG: t1
+; THUMB-LONG: movw r3, :lower16:L_memset$non_lazy_ptr
+; THUMB-LONG: movt r3, :upper16:L_memset$non_lazy_ptr
+; THUMB-LONG: ldr r3, [r3]
+; THUMB-LONG: blx r3
call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i32 1, i1 false)
ret void
}
@@ -41,6 +53,11 @@ define void @t2() nounwind ssp {
; ARM: mov r0, r1
; ARM: ldr r1, [sp] @ 4-byte Reload
; ARM: bl _memcpy
+; ARM-LONG: t2
+; ARM-LONG: movw r3, :lower16:L_memcpy$non_lazy_ptr
+; ARM-LONG: movt r3, :upper16:L_memcpy$non_lazy_ptr
+; ARM-LONG: ldr r3, [r3]
+; ARM-LONG: blx r3
; THUMB: t2
; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
@@ -51,6 +68,11 @@ define void @t2() nounwind ssp {
; THUMB: movt r2, #0
; THUMB: mov r0, r1
; THUMB: bl _memcpy
+; THUMB-LONG: t2
+; THUMB-LONG: movw r3, :lower16:L_memcpy$non_lazy_ptr
+; THUMB-LONG: movt r3, :upper16:L_memcpy$non_lazy_ptr
+; THUMB-LONG: ldr r3, [r3]
+; THUMB-LONG: blx r3
call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 17, i32 1, i1 false)
ret void
}
@@ -67,6 +89,11 @@ define void @t3() nounwind ssp {
; ARM: movw r2, #10
; ARM: mov r0, r1
; ARM: bl _memmove
+; ARM-LONG: t3
+; ARM-LONG: movw r3, :lower16:L_memmove$non_lazy_ptr
+; ARM-LONG: movt r3, :upper16:L_memmove$non_lazy_ptr
+; ARM-LONG: ldr r3, [r3]
+; ARM-LONG: blx r3
; THUMB: t3
; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
@@ -77,6 +104,11 @@ define void @t3() nounwind ssp {
; THUMB: movt r2, #0
; THUMB: mov r0, r1
; THUMB: bl _memmove
+; THUMB-LONG: t3
+; THUMB-LONG: movw r3, :lower16:L_memmove$non_lazy_ptr
+; THUMB-LONG: movt r3, :upper16:L_memmove$non_lazy_ptr
+; THUMB-LONG: ldr r3, [r3]
+; THUMB-LONG: blx r3
call void @llvm.memmove.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
ret void
}
diff --git a/test/CodeGen/ARM/fast-isel-shifter.ll b/test/CodeGen/ARM/fast-isel-shifter.ll
new file mode 100644
index 000000000000..111818b289e8
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-shifter.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+
+define i32 @shl() nounwind ssp {
+entry:
+; ARM: shl
+; ARM: lsl r0, r0, #2
+ %shl = shl i32 -1, 2
+ ret i32 %shl
+}
+
+define i32 @shl_reg(i32 %src1, i32 %src2) nounwind ssp {
+entry:
+; ARM: shl_reg
+; ARM: lsl r0, r0, r1
+ %shl = shl i32 %src1, %src2
+ ret i32 %shl
+}
+
+define i32 @lshr() nounwind ssp {
+entry:
+; ARM: lshr
+; ARM: lsr r0, r0, #2
+ %lshr = lshr i32 -1, 2
+ ret i32 %lshr
+}
+
+define i32 @lshr_reg(i32 %src1, i32 %src2) nounwind ssp {
+entry:
+; ARM: lshr_reg
+; ARM: lsr r0, r0, r1
+ %lshr = lshr i32 %src1, %src2
+ ret i32 %lshr
+}
+
+define i32 @ashr() nounwind ssp {
+entry:
+; ARM: ashr
+; ARM: asr r0, r0, #2
+ %ashr = ashr i32 -1, 2
+ ret i32 %ashr
+}
+
+define i32 @ashr_reg(i32 %src1, i32 %src2) nounwind ssp {
+entry:
+; ARM: ashr_reg
+; ARM: asr r0, r0, r1
+ %ashr = ashr i32 %src1, %src2
+ ret i32 %ashr
+}
+
diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll
index 417e2d9e410a..ecd5fe27a4b7 100644
--- a/test/CodeGen/ARM/fast-isel.ll
+++ b/test/CodeGen/ARM/fast-isel.ll
@@ -226,3 +226,15 @@ define i32 @urem_fold(i32 %a) nounwind {
%rem = urem i32 %a, 32
ret i32 %rem
}
+
+define i32 @test7() noreturn nounwind {
+entry:
+; ARM: @test7
+; THUMB: @test7
+; ARM: trap
+; THUMB: trap
+ tail call void @llvm.trap( )
+ unreachable
+}
+
+declare void @llvm.trap() nounwind
diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll
index 27fa2b093d89..5511d24cb280 100644
--- a/test/CodeGen/ARM/fcopysign.ll
+++ b/test/CodeGen/ARM/fcopysign.ll
@@ -11,7 +11,7 @@ entry:
; HARD: test1:
; HARD: vmov.i32 [[REG1:(d[0-9]+)]], #0x80000000
; HARD: vbsl [[REG1]], d
- %0 = tail call float @copysignf(float %x, float %y) nounwind
+ %0 = tail call float @copysignf(float %x, float %y) nounwind readnone
ret float %0
}
@@ -25,7 +25,7 @@ entry:
; HARD: vmov.i32 [[REG2:(d[0-9]+)]], #0x80000000
; HARD: vshl.i64 [[REG2]], [[REG2]], #32
; HARD: vbsl [[REG2]], d1, d0
- %0 = tail call double @copysign(double %x, double %y) nounwind
+ %0 = tail call double @copysign(double %x, double %y) nounwind readnone
ret double %0
}
@@ -36,7 +36,7 @@ entry:
; SOFT: vshl.i64 [[REG3]], [[REG3]], #32
; SOFT: vbsl [[REG3]],
%0 = fmul double %x, %y
- %1 = tail call double @copysign(double %0, double %z) nounwind
+ %1 = tail call double @copysign(double %0, double %z) nounwind readnone
ret double %1
}
diff --git a/test/CodeGen/ARM/floorf.ll b/test/CodeGen/ARM/floorf.ll
new file mode 100644
index 000000000000..492fc36d2800
--- /dev/null
+++ b/test/CodeGen/ARM/floorf.ll
@@ -0,0 +1,29 @@
+; RUN: llc -mtriple=arm-unknown-unknown < %s | FileCheck %s
+
+; CHECK: test1
+define float @test1() nounwind uwtable readnone ssp {
+; CHECK-NOT: floorf
+ %foo = call float @floorf(float 0x4000CCCCC0000000) nounwind readnone
+ ret float %foo
+}
+
+; CHECK: test2
+define float @test2() nounwind uwtable readnone ssp {
+; CHECK-NOT: ceilf
+ %foo = call float @ceilf(float 0x4000CCCCC0000000) nounwind readnone
+ ret float %foo
+}
+
+; CHECK: test3
+define float @test3() nounwind uwtable readnone ssp {
+; CHECK-NOT: truncf
+ %foo = call float @truncf(float 0x4000CCCCC0000000) nounwind readnone
+ ret float %foo
+}
+
+declare float @floorf(float) nounwind readnone
+declare float @ceilf(float) nounwind readnone
+declare float @truncf(float) nounwind readnone
+
+
+
diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll
index bc118b8cb226..3c3182bc6341 100644
--- a/test/CodeGen/ARM/fmuls.ll
+++ b/test/CodeGen/ARM/fmuls.ll
@@ -21,3 +21,12 @@ entry:
; CORTEXA8: vmul.f32 d0, d1, d0
; CORTEXA9: test:
; CORTEXA9: vmul.f32 s{{.}}, s{{.}}, s{{.}}
+
+; VFP2: test2
+define float @test2(float %a) nounwind {
+; CHECK-NOT: mul
+; CHECK: mov pc, lr
+ %ret = fmul float %a, 1.0
+ ret float %ret
+}
+
diff --git a/test/CodeGen/ARM/fparith.ll b/test/CodeGen/ARM/fparith.ll
index ce6d6b29e9d5..40ea33becebb 100644
--- a/test/CodeGen/ARM/fparith.ll
+++ b/test/CodeGen/ARM/fparith.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+vfp2 | FileCheck %s
define float @f1(float %a, float %b) {
;CHECK: f1:
@@ -84,7 +84,7 @@ define float @f11(float %a) {
;CHECK: f11:
;CHECK: bic
entry:
- %tmp1 = call float @fabsf( float %a ) ; <float> [#uses=1]
+ %tmp1 = call float @fabsf( float %a ) readnone ; <float> [#uses=1]
ret float %tmp1
}
@@ -94,7 +94,7 @@ define double @f12(double %a) {
;CHECK: f12:
;CHECK: vabs.f64
entry:
- %tmp1 = call double @fabs( double %a ) ; <double> [#uses=1]
+ %tmp1 = call double @fabs( double %a ) readnone ; <double> [#uses=1]
ret double %tmp1
}
diff --git a/test/CodeGen/ARM/fusedMAC.ll b/test/CodeGen/ARM/fusedMAC.ll
index 802d1b8b3932..303d165de0b6 100644
--- a/test/CodeGen/ARM/fusedMAC.ll
+++ b/test/CodeGen/ARM/fusedMAC.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 -fp-contract=fast | FileCheck %s
; Check generated fused MAC and MLS.
define double @fusedMACTest1(double %d1, double %d2, double %d3) {
@@ -138,8 +138,16 @@ entry:
; CHECK: vfms.f64
%tmp1 = fsub double -0.0, %b
%tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone
- %tmp3 = fsub double -0.0, %tmp2
- ret double %tmp3
+ ret double %tmp2
+}
+
+define float @test_fnms_f32(float %a, float %b, float* %c) nounwind readnone ssp {
+; CHECK: test_fnms_f32
+; CHECK: vfnms.f32
+ %tmp1 = load float* %c, align 4
+ %tmp2 = fsub float -0.0, %tmp1
+ %tmp3 = tail call float @llvm.fma.f32(float %a, float %b, float %tmp2) nounwind readnone
+ ret float %tmp3
}
define double @test_fnms_f64(double %a, double %b, double %c) nounwind readnone ssp {
@@ -158,7 +166,8 @@ entry:
; CHECK: vfnms.f64
%tmp1 = fsub double -0.0, %b
%tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone
- ret double %tmp2
+ %tmp3 = fsub double -0.0, %tmp2
+ ret double %tmp3
}
define double @test_fnma_f64(double %a, double %b, double %c) nounwind readnone ssp {
@@ -180,6 +189,36 @@ entry:
ret double %tmp3
}
+define float @test_fma_const_fold(float %a, float %b) nounwind {
+; CHECK: test_fma_const_fold
+; CHECK-NOT: vfma
+; CHECK-NOT: vmul
+; CHECK: vadd
+ %ret = call float @llvm.fma.f32(float %a, float 1.0, float %b)
+ ret float %ret
+}
+
+define float @test_fma_canonicalize(float %a, float %b) nounwind {
+; CHECK: test_fma_canonicalize
+; CHECK: vmov.f32 [[R1:s[0-9]+]], #2.000000e+00
+; CHECK: vfma.f32 {{s[0-9]+}}, {{s[0-9]+}}, [[R1]]
+ %ret = call float @llvm.fma.f32(float 2.0, float %a, float %b)
+ ret float %ret
+}
+
+; Check that very wide vector fma's can be split into legal fma's.
+define void @test_fma_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float>* %p) nounwind readnone ssp {
+; CHECK: test_fma_v8f32
+; CHECK: vfma.f32
+; CHECK: vfma.f32
+entry:
+ %call = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) nounwind readnone
+ store <8 x float> %call, <8 x float>* %p, align 16
+ ret void
+}
+
+
declare float @llvm.fma.f32(float, float, float) nounwind readnone
declare double @llvm.fma.f64(double, double, double) nounwind readnone
declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
+declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/iabs.ll b/test/CodeGen/ARM/iabs.ll
index 89e309d16069..600a8c29ea91 100644
--- a/test/CodeGen/ARM/iabs.ll
+++ b/test/CodeGen/ARM/iabs.ll
@@ -10,7 +10,25 @@ define i32 @test(i32 %a) {
%b = icmp sgt i32 %a, -1
%abs = select i1 %b, i32 %a, i32 %tmp1neg
ret i32 %abs
-; CHECK: movs r0, r0
+; CHECK: cmp
; CHECK: rsbmi r0, r0, #0
; CHECK: bx lr
}
+
+; rdar://11633193
+;; 3 instructions will be generated for abs(a-b):
+;; subs
+;; rsbmi
+;; bx
+define i32 @test2(i32 %a, i32 %b) nounwind readnone ssp {
+entry:
+; CHECK: test2
+; CHECK: subs
+; CHECK-NEXT: rsbmi
+; CHECK-NEXT: bx
+ %sub = sub nsw i32 %a, %b
+ %cmp = icmp sgt i32 %sub, -1
+ %sub1 = sub nsw i32 0, %sub
+ %cond = select i1 %cmp, i32 %sub, i32 %sub1
+ ret i32 %cond
+}
diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll
index 3f8fd75f49f9..73b546d021d5 100644
--- a/test/CodeGen/ARM/ldrd.ll
+++ b/test/CodeGen/ARM/ldrd.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=fast | FileCheck %s -check-prefix=A8
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-m3 -regalloc=fast | FileCheck %s -check-prefix=M3
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=fast -optimize-regalloc=0 | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-m3 -regalloc=fast -optimize-regalloc=0 | FileCheck %s -check-prefix=M3
; rdar://6949835
; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=BASIC
; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=greedy | FileCheck %s -check-prefix=GREEDY
@@ -18,7 +18,6 @@ entry:
; M3: t:
; M3-NOT: ldrd
-; M3: ldm.w r2, {r2, r3}
%0 = load i64** @b, align 4
%1 = load i64* %0, align 4
diff --git a/test/CodeGen/ARM/lsr-scale-addr-mode.ll b/test/CodeGen/ARM/lsr-scale-addr-mode.ll
index 8130019cbfd9..0c8d38748909 100644
--- a/test/CodeGen/ARM/lsr-scale-addr-mode.ll
+++ b/test/CodeGen/ARM/lsr-scale-addr-mode.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | grep lsl | grep -F {lsl #2\]}
+; RUN: llc < %s -march=arm | grep lsl | grep -F "lsl #2]"
; Should use scaled addressing mode.
define void @sintzero(i32* %a) nounwind {
diff --git a/test/CodeGen/ARM/movt-movw-global.ll b/test/CodeGen/ARM/movt-movw-global.ll
index 991d728dddac..bbedea19d780 100644
--- a/test/CodeGen/ARM/movt-movw-global.ll
+++ b/test/CodeGen/ARM/movt-movw-global.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -mtriple=armv7-eabi | FileCheck %s -check-prefix=EABI
-; RUN: llc < %s -mtriple=armv7-apple-ios -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=IOS
-; RUN: llc < %s -mtriple=armv7-apple-ios -relocation-model=pic | FileCheck %s -check-prefix=IOS-PIC
-; RUN: llc < %s -mtriple=armv7-apple-ios -relocation-model=static | FileCheck %s -check-prefix=IOS-STATIC
+; RUN: llc < %s -verify-machineinstrs -mtriple=armv7-eabi | FileCheck %s -check-prefix=EABI
+; RUN: llc < %s -verify-machineinstrs -mtriple=armv7-apple-ios -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=IOS
+; RUN: llc < %s -verify-machineinstrs -mtriple=armv7-apple-ios -relocation-model=pic | FileCheck %s -check-prefix=IOS-PIC
+; RUN: llc < %s -verify-machineinstrs -mtriple=armv7-apple-ios -relocation-model=static | FileCheck %s -check-prefix=IOS-STATIC
@foo = common global i32 0
diff --git a/test/CodeGen/ARM/neon_div.ll b/test/CodeGen/ARM/neon_div.ll
index de48feeb9ec2..4a82c36676f2 100644
--- a/test/CodeGen/ARM/neon_div.ll
+++ b/test/CodeGen/ARM/neon_div.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -march=arm -mattr=+neon -pre-RA-sched=source | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -pre-RA-sched=source -disable-post-ra | FileCheck %s
define <8 x i8> @sdivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vrecpe.f32
-;CHECK: vrecpe.f32
;CHECK: vmovn.i32
+;CHECK: vrecpe.f32
;CHECK: vmovn.i32
;CHECK: vmovn.i16
%tmp1 = load <8 x i8>* %A
@@ -15,10 +15,10 @@ define <8 x i8> @sdivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <8 x i8> @udivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vrecpe.f32
;CHECK: vrecps.f32
+;CHECK: vmovn.i32
;CHECK: vrecpe.f32
;CHECK: vrecps.f32
;CHECK: vmovn.i32
-;CHECK: vmovn.i32
;CHECK: vqmovun.s16
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
diff --git a/test/CodeGen/ARM/opt-shuff-tstore.ll b/test/CodeGen/ARM/opt-shuff-tstore.ll
index b4da5524289f..df98e231ccfd 100644
--- a/test/CodeGen/ARM/opt-shuff-tstore.ll
+++ b/test/CodeGen/ARM/opt-shuff-tstore.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=cortex-a9 -mtriple=arm-linux-unknown -promote-elements -mattr=+neon < %s | FileCheck %s
+; RUN: llc -mcpu=cortex-a9 -mtriple=arm-linux-unknown -mattr=+neon < %s | FileCheck %s
; CHECK: func_4_8
; CHECK: vst1.32
diff --git a/test/CodeGen/ARM/pr13249.ll b/test/CodeGen/ARM/pr13249.ll
new file mode 100644
index 000000000000..4bc881035497
--- /dev/null
+++ b/test/CodeGen/ARM/pr13249.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple armv7--linux-gnueabi
+
+define arm_aapcscc i8* @__strtok_r_1c(i8* %arg, i8 signext %arg1, i8** nocapture %arg2) nounwind {
+bb:
+ br label %bb3
+
+bb3: ; preds = %bb3, %bb
+ %tmp = phi i8* [ %tmp5, %bb3 ], [ %arg, %bb ]
+ %tmp4 = load i8* %tmp, align 1
+ %tmp5 = getelementptr inbounds i8* %tmp, i32 1
+ br i1 undef, label %bb3, label %bb7
+
+bb7: ; preds = %bb13, %bb3
+ %tmp8 = phi i8 [ %tmp14, %bb13 ], [ %tmp4, %bb3 ]
+ %tmp9 = phi i8* [ %tmp12, %bb13 ], [ %tmp, %bb3 ]
+ %tmp10 = icmp ne i8 %tmp8, %arg1
+ %tmp12 = getelementptr inbounds i8* %tmp9, i32 1
+ br i1 %tmp10, label %bb13, label %bb15
+
+bb13: ; preds = %bb7
+ %tmp14 = load i8* %tmp12, align 1
+ br label %bb7
+
+bb15: ; preds = %bb7
+ store i8* %tmp9, i8** %arg2, align 4
+ ret i8* %tmp
+}
diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll
index 3e07da841a5a..418d4f31ee2b 100644
--- a/test/CodeGen/ARM/select.ll
+++ b/test/CodeGen/ARM/select.ll
@@ -113,3 +113,29 @@ entry:
call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, [2 x i32], i32, float)*)(i8* undef, i8* undef, [2 x i32] %tmp493, i32 0, float 1.000000e+00) optsize
ret void
}
+
+; CHECK: f10
+define float @f10(i32 %a, i32 %b) nounwind uwtable readnone ssp {
+; CHECK-NOT: floatsisf
+ %1 = icmp eq i32 %a, %b
+ %2 = zext i1 %1 to i32
+ %3 = sitofp i32 %2 to float
+ ret float %3
+}
+
+; CHECK: f11
+define float @f11(i32 %a, i32 %b) nounwind uwtable readnone ssp {
+; CHECK-NOT: floatsisf
+ %1 = icmp eq i32 %a, %b
+ %2 = sitofp i1 %1 to float
+ ret float %2
+}
+
+; CHECK: f12
+define float @f12(i32 %a, i32 %b) nounwind uwtable readnone ssp {
+; CHECK-NOT: floatunsisf
+ %1 = icmp eq i32 %a, %b
+ %2 = uitofp i1 %1 to float
+ ret float %2
+}
+
diff --git a/test/CodeGen/ARM/smml.ll b/test/CodeGen/ARM/smml.ll
new file mode 100644
index 000000000000..99df0d4c96b9
--- /dev/null
+++ b/test/CodeGen/ARM/smml.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+define i32 @f(i32 %a, i32 %b, i32 %c) nounwind readnone ssp {
+entry:
+; CHECK-NOT: smmls
+ %conv4 = zext i32 %a to i64
+ %conv1 = sext i32 %b to i64
+ %conv2 = sext i32 %c to i64
+ %mul = mul nsw i64 %conv2, %conv1
+ %shr5 = lshr i64 %mul, 32
+ %sub = sub nsw i64 %conv4, %shr5
+ %conv3 = trunc i64 %sub to i32
+ ret i32 %conv3
+}
diff --git a/test/CodeGen/ARM/str_pre-2.ll b/test/CodeGen/ARM/str_pre-2.ll
index 983ba455e7b7..5ce2bcecb476 100644
--- a/test/CodeGen/ARM/str_pre-2.ll
+++ b/test/CodeGen/ARM/str_pre-2.ll
@@ -1,13 +1,12 @@
-; RUN: llc < %s -mtriple=armv6-linux-gnu -regalloc=basic | FileCheck %s
-
-; The greedy register allocator uses a single CSR here, invalidating the test.
+; RUN: llc < %s -mtriple=armv6-linux-gnu | FileCheck %s
@b = external global i64*
define i64 @t(i64 %a) nounwind readonly {
entry:
-; CHECK: push {lr}
-; CHECK: pop {lr}
+; CHECK: push {r4, r5, lr}
+; CHECK: pop {r4, r5, pc}
+ call void asm sideeffect "", "~{r4},~{r5}"() nounwind
%0 = load i64** @b, align 4
%1 = load i64* %0, align 4
%2 = mul i64 %1, %a
diff --git a/test/CodeGen/ARM/str_pre.ll b/test/CodeGen/ARM/str_pre.ll
index e56e3f253e63..d8b3f0e767b5 100644
--- a/test/CodeGen/ARM/str_pre.ll
+++ b/test/CodeGen/ARM/str_pre.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=arm | \
-; RUN: grep {str.*\\!} | count 2
+; RUN: grep "str.*\!" | count 2
define void @test1(i32* %X, i32* %A, i32** %dest) {
%B = load i32* %A ; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/struct_byval.ll b/test/CodeGen/ARM/struct_byval.ll
new file mode 100644
index 000000000000..99ba475ad7b1
--- /dev/null
+++ b/test/CodeGen/ARM/struct_byval.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios6.0 | FileCheck %s
+
+; rdar://9877866
+%struct.SmallStruct = type { i32, [8 x i32], [37 x i8] }
+%struct.LargeStruct = type { i32, [1001 x i8], [300 x i32] }
+
+define i32 @f() nounwind ssp {
+entry:
+; CHECK: f:
+; CHECK: ldr
+; CHECK: str
+; CHECK-NOT:bne
+ %st = alloca %struct.SmallStruct, align 4
+ %call = call i32 @e1(%struct.SmallStruct* byval %st)
+ ret i32 0
+}
+
+; Generate a loop for large struct byval
+define i32 @g() nounwind ssp {
+entry:
+; CHECK: g:
+; CHECK: ldr
+; CHECK: sub
+; CHECK: str
+; CHECK: bne
+ %st = alloca %struct.LargeStruct, align 4
+ %call = call i32 @e2(%struct.LargeStruct* byval %st)
+ ret i32 0
+}
+
+; Generate a loop using NEON instructions
+define i32 @h() nounwind ssp {
+entry:
+; CHECK: h:
+; CHECK: vld1
+; CHECK: sub
+; CHECK: vst1
+; CHECK: bne
+ %st = alloca %struct.LargeStruct, align 16
+ %call = call i32 @e3(%struct.LargeStruct* byval align 16 %st)
+ ret i32 0
+}
+
+declare i32 @e1(%struct.SmallStruct* nocapture byval %in) nounwind
+declare i32 @e2(%struct.LargeStruct* nocapture byval %in) nounwind
+declare i32 @e3(%struct.LargeStruct* nocapture byval align 16 %in) nounwind
diff --git a/test/CodeGen/ARM/sub-cmp-peephole.ll b/test/CodeGen/ARM/sub-cmp-peephole.ll
new file mode 100644
index 000000000000..6fcbdee30d34
--- /dev/null
+++ b/test/CodeGen/ARM/sub-cmp-peephole.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
+
+define i32 @f(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK: f:
+; CHECK: subs
+; CHECK-NOT: cmp
+ %cmp = icmp sgt i32 %a, %b
+ %sub = sub nsw i32 %a, %b
+ %sub. = select i1 %cmp, i32 %sub, i32 0
+ ret i32 %sub.
+}
+
+define i32 @g(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK: g:
+; CHECK: subs
+; CHECK-NOT: cmp
+ %cmp = icmp slt i32 %a, %b
+ %sub = sub nsw i32 %b, %a
+ %sub. = select i1 %cmp, i32 %sub, i32 0
+ ret i32 %sub.
+}
+
+define i32 @h(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK: h:
+; CHECK: subs
+; CHECK-NOT: cmp
+ %cmp = icmp sgt i32 %a, 3
+ %sub = sub nsw i32 %a, 3
+ %sub. = select i1 %cmp, i32 %sub, i32 %b
+ ret i32 %sub.
+}
+
+; rdar://11725965
+define i32 @i(i32 %a, i32 %b) nounwind readnone ssp {
+entry:
+; CHECK: i:
+; CHECK: subs
+; CHECK-NOT: cmp
+ %cmp = icmp ult i32 %a, %b
+ %sub = sub i32 %b, %a
+ %sub. = select i1 %cmp, i32 %sub, i32 0
+ ret i32 %sub.
+}
+; If CPSR is live-out, we can't remove cmp if there exists
+; a swapped sub.
+define i32 @j(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: j:
+; CHECK: sub
+; CHECK: cmp
+ %cmp = icmp eq i32 %b, %a
+ %sub = sub nsw i32 %a, %b
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ %cmp2 = icmp sgt i32 %b, %a
+ %sel = select i1 %cmp2, i32 %sub, i32 %a
+ ret i32 %sel
+
+if.else:
+ ret i32 %sub
+}
diff --git a/test/CodeGen/ARM/sub.ll b/test/CodeGen/ARM/sub.ll
index 06ea703fc74b..474043afc11d 100644
--- a/test/CodeGen/ARM/sub.ll
+++ b/test/CodeGen/ARM/sub.ll
@@ -36,3 +36,15 @@ entry:
%sel = select i1 %cmp, i32 1, i32 %sub
ret i32 %sel
}
+
+; rdar://11726136
+define i32 @f5(i32 %x) {
+entry:
+; CHECK: f5
+; CHECK: movw r1, #65535
+; CHECK-NOT: movt
+; CHECK-NOT: add
+; CHECK: sub r0, r0, r1
+ %sub = add i32 %x, -65535
+ ret i32 %sub
+}
diff --git a/test/CodeGen/ARM/thread_pointer.ll b/test/CodeGen/ARM/thread_pointer.ll
index 3143387ead65..c403fa5c4a2a 100644
--- a/test/CodeGen/ARM/thread_pointer.ll
+++ b/test/CodeGen/ARM/thread_pointer.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
-; RUN: grep {__aeabi_read_tp}
+; RUN: grep "__aeabi_read_tp"
define i8* @test() {
entry:
diff --git a/test/CodeGen/ARM/thumb2-it-block.ll b/test/CodeGen/ARM/thumb2-it-block.ll
index 28fd4696535f..a25352c0f03d 100644
--- a/test/CodeGen/ARM/thumb2-it-block.ll
+++ b/test/CodeGen/ARM/thumb2-it-block.ll
@@ -3,10 +3,10 @@
define i32 @test(i32 %a, i32 %b) {
entry:
-; CHECK: movs.w
+; CHECK: cmp
; CHECK-NEXT: it mi
; CHECK-NEXT: rsbmi
-; CHECK-NEXT: movs.w
+; CHECK-NEXT: cmp
; CHECK-NEXT: it mi
; CHECK-NEXT: rsbmi
%cmp1 = icmp slt i32 %a, 0
diff --git a/test/CodeGen/ARM/tls-models.ll b/test/CodeGen/ARM/tls-models.ll
new file mode 100644
index 000000000000..a5f3c9005af0
--- /dev/null
+++ b/test/CodeGen/ARM/tls-models.ll
@@ -0,0 +1,117 @@
+; RUN: llc -march=arm -mtriple=arm-linux-gnueabi < %s | FileCheck -check-prefix=CHECK-NONPIC %s
+; RUN: llc -march=arm -mtriple=arm-linux-gnueabi -relocation-model=pic < %s | FileCheck -check-prefix=CHECK-PIC %s
+
+
+@external_gd = external thread_local global i32
+@internal_gd = internal thread_local global i32 42
+
+@external_ld = external thread_local(localdynamic) global i32
+@internal_ld = internal thread_local(localdynamic) global i32 42
+
+@external_ie = external thread_local(initialexec) global i32
+@internal_ie = internal thread_local(initialexec) global i32 42
+
+@external_le = external thread_local(localexec) global i32
+@internal_le = internal thread_local(localexec) global i32 42
+
+; ----- no model specified -----
+
+define i32* @f1() {
+entry:
+ ret i32* @external_gd
+
+ ; Non-PIC code can use initial-exec, PIC code has to use general dynamic.
+ ; CHECK-NONPIC: f1:
+ ; CHECK-NONPIC: external_gd(gottpoff)
+ ; CHECK-PIC: f1:
+ ; CHECK-PIC: external_gd(tlsgd)
+}
+
+define i32* @f2() {
+entry:
+ ret i32* @internal_gd
+
+ ; Non-PIC code can use local exec, PIC code can use local dynamic,
+ ; but that is not implemented, so falls back to general dynamic.
+ ; CHECK-NONPIC: f2:
+ ; CHECK-NONPIC: internal_gd(tpoff)
+ ; CHECK-PIC: f2:
+ ; CHECK-PIC: internal_gd(tlsgd)
+}
+
+
+; ----- localdynamic specified -----
+
+define i32* @f3() {
+entry:
+ ret i32* @external_ld
+
+ ; Non-PIC code can use initial exec, PIC should use local dynamic,
+ ; but that is not implemented, so falls back to general dynamic.
+ ; CHECK-NONPIC: f3:
+ ; CHECK-NONPIC: external_ld(gottpoff)
+ ; CHECK-PIC: f3:
+ ; CHECK-PIC: external_ld(tlsgd)
+}
+
+define i32* @f4() {
+entry:
+ ret i32* @internal_ld
+
+ ; Non-PIC code can use local exec, PIC code can use local dynamic,
+ ; but that is not implemented, so it falls back to general dynamic.
+ ; CHECK-NONPIC: f4:
+ ; CHECK-NONPIC: internal_ld(tpoff)
+ ; CHECK-PIC: f4:
+ ; CHECK-PIC: internal_ld(tlsgd)
+}
+
+
+; ----- initialexec specified -----
+
+define i32* @f5() {
+entry:
+ ret i32* @external_ie
+
+ ; Non-PIC and PIC code will use initial exec as specified.
+ ; CHECK-NONPIC: f5:
+ ; CHECK-NONPIC: external_ie(gottpoff)
+ ; CHECK-PIC: f5:
+ ; CHECK-PIC: external_ie(gottpoff)
+}
+
+define i32* @f6() {
+entry:
+ ret i32* @internal_ie
+
+ ; Non-PIC code can use local exec, PIC code use initial exec as specified.
+ ; CHECK-NONPIC: f6:
+ ; CHECK-NONPIC: internal_ie(tpoff)
+ ; CHECK-PIC: f6:
+ ; CHECK-PIC: internal_ie(gottpoff)
+}
+
+
+; ----- localexec specified -----
+
+define i32* @f7() {
+entry:
+ ret i32* @external_le
+
+ ; Non-PIC and PIC code will use local exec as specified.
+ ; CHECK-NONPIC: f7:
+ ; CHECK-NONPIC: external_le(tpoff)
+ ; CHECK-PIC: f7:
+ ; CHECK-PIC: external_le(tpoff)
+}
+
+define i32* @f8() {
+entry:
+ ret i32* @internal_le
+
+ ; Non-PIC and PIC code will use local exec as specified.
+ ; CHECK-NONPIC: f8:
+ ; CHECK-NONPIC: internal_le(tpoff)
+ ; CHECK-PIC: f8:
+ ; CHECK-PIC: internal_le(tpoff)
+}
diff --git a/test/CodeGen/ARM/tls1.ll b/test/CodeGen/ARM/tls1.ll
index 1087094e5798..ec4278ce72f6 100644
--- a/test/CodeGen/ARM/tls1.ll
+++ b/test/CodeGen/ARM/tls1.ll
@@ -1,9 +1,9 @@
; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
-; RUN: grep {i(tpoff)}
+; RUN: grep "i(tpoff)"
; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
-; RUN: grep {__aeabi_read_tp}
+; RUN: grep "__aeabi_read_tp"
; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi \
-; RUN: -relocation-model=pic | grep {__tls_get_addr}
+; RUN: -relocation-model=pic | grep "__tls_get_addr"
@i = thread_local global i32 15 ; <i32*> [#uses=2]
diff --git a/test/CodeGen/ARM/tls3.ll b/test/CodeGen/ARM/tls3.ll
index df7a4ca02db8..e0e944f70c5d 100644
--- a/test/CodeGen/ARM/tls3.ll
+++ b/test/CodeGen/ARM/tls3.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
-; RUN: grep {tbss}
+; RUN: grep "tbss"
%struct.anon = type { i32, i32 }
@teste = internal thread_local global %struct.anon zeroinitializer ; <%struct.anon*> [#uses=1]
diff --git a/test/CodeGen/ARM/twoaddrinstr.ll b/test/CodeGen/ARM/twoaddrinstr.ll
new file mode 100644
index 000000000000..4e227dd5be36
--- /dev/null
+++ b/test/CodeGen/ARM/twoaddrinstr.ll
@@ -0,0 +1,21 @@
+; Tests for the two-address instruction pass.
+; RUN: llc -march=arm -mcpu=cortex-a9 < %s | FileCheck %s
+
+define void @PR13378() nounwind {
+; This was orriginally a crasher trying to schedule the instructions.
+; CHECK: PR13378:
+; CHECK: vldmia
+; CHECK-NEXT: vmov.f32
+; CHECK-NEXT: vstmia
+; CHECK-NEXT: vstmia
+; CHECK-NEXT: vmov.f32
+; CHECK-NEXT: vstmia
+
+entry:
+ %0 = load <4 x float>* undef
+ store <4 x float> zeroinitializer, <4 x float>* undef
+ store <4 x float> %0, <4 x float>* undef
+ %1 = insertelement <4 x float> %0, float 1.000000e+00, i32 3
+ store <4 x float> %1, <4 x float>* undef
+ unreachable
+}
diff --git a/test/CodeGen/ARM/unsafe-fsub.ll b/test/CodeGen/ARM/unsafe-fsub.ll
new file mode 100644
index 000000000000..3a4477d31562
--- /dev/null
+++ b/test/CodeGen/ARM/unsafe-fsub.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=arm -mcpu=cortex-a9 < %s | FileCheck -check-prefix=SAFE %s
+; RUN: llc -march=arm -mcpu=cortex-a9 -enable-unsafe-fp-math < %s | FileCheck -check-prefix=FAST %s
+
+target triple = "armv7-apple-ios"
+
+; SAFE: test
+; FAST: test
+define float @test(float %x, float %y) {
+entry:
+; SAFE: vmul.f32
+; SAFE: vsub.f32
+; FAST: mov r0, #0
+ %0 = fmul float %x, %y
+ %1 = fsub float %0, %0
+ ret float %1
+}
+
+
diff --git a/test/CodeGen/ARM/vcnt.ll b/test/CodeGen/ARM/vcnt.ll
index 450f90d03dfe..9f55c24b4029 100644
--- a/test/CodeGen/ARM/vcnt.ll
+++ b/test/CodeGen/ARM/vcnt.ll
@@ -1,79 +1,80 @@
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; NB: this tests vcnt, vclz, and vcls
define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
;CHECK: vcnt8:
-;CHECK: vcnt.8
+;CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}}
%tmp1 = load <8 x i8>* %A
- %tmp2 = call <8 x i8> @llvm.arm.neon.vcnt.v8i8(<8 x i8> %tmp1)
+ %tmp2 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %tmp1)
ret <8 x i8> %tmp2
}
define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind {
;CHECK: vcntQ8:
-;CHECK: vcnt.8
+;CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}}
%tmp1 = load <16 x i8>* %A
- %tmp2 = call <16 x i8> @llvm.arm.neon.vcnt.v16i8(<16 x i8> %tmp1)
+ %tmp2 = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %tmp1)
ret <16 x i8> %tmp2
}
-declare <8 x i8> @llvm.arm.neon.vcnt.v8i8(<8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm.neon.vcnt.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i8> @llvm.ctpop.v8i8(<8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) nounwind readnone
define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
;CHECK: vclz8:
-;CHECK: vclz.i8
+;CHECK: vclz.i8 {{d[0-9]+}}, {{d[0-9]+}}
%tmp1 = load <8 x i8>* %A
- %tmp2 = call <8 x i8> @llvm.arm.neon.vclz.v8i8(<8 x i8> %tmp1)
+ %tmp2 = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %tmp1, i1 0)
ret <8 x i8> %tmp2
}
define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
;CHECK: vclz16:
-;CHECK: vclz.i16
+;CHECK: vclz.i16 {{d[0-9]+}}, {{d[0-9]+}}
%tmp1 = load <4 x i16>* %A
- %tmp2 = call <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16> %tmp1)
+ %tmp2 = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %tmp1, i1 0)
ret <4 x i16> %tmp2
}
define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
;CHECK: vclz32:
-;CHECK: vclz.i32
+;CHECK: vclz.i32 {{d[0-9]+}}, {{d[0-9]+}}
%tmp1 = load <2 x i32>* %A
- %tmp2 = call <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32> %tmp1)
+ %tmp2 = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %tmp1, i1 0)
ret <2 x i32> %tmp2
}
define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
;CHECK: vclzQ8:
-;CHECK: vclz.i8
+;CHECK: vclz.i8 {{q[0-9]+}}, {{q[0-9]+}}
%tmp1 = load <16 x i8>* %A
- %tmp2 = call <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8> %tmp1)
+ %tmp2 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %tmp1, i1 0)
ret <16 x i8> %tmp2
}
define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
;CHECK: vclzQ16:
-;CHECK: vclz.i16
+;CHECK: vclz.i16 {{q[0-9]+}}, {{q[0-9]+}}
%tmp1 = load <8 x i16>* %A
- %tmp2 = call <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16> %tmp1)
+ %tmp2 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %tmp1, i1 0)
ret <8 x i16> %tmp2
}
define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind {
;CHECK: vclzQ32:
-;CHECK: vclz.i32
+;CHECK: vclz.i32 {{q[0-9]+}}, {{q[0-9]+}}
%tmp1 = load <4 x i32>* %A
- %tmp2 = call <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32> %tmp1)
+ %tmp2 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %tmp1, i1 0)
ret <4 x i32> %tmp2
}
-declare <8 x i8> @llvm.arm.neon.vclz.v8i8(<8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32>) nounwind readnone
+declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1) nounwind readnone
+declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) nounwind readnone
+declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
-declare <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32>) nounwind readnone
+declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) nounwind readnone
+declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) nounwind readnone
+declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
;CHECK: vclss8:
diff --git a/test/CodeGen/ARM/vector-extend-narrow.ll b/test/CodeGen/ARM/vector-extend-narrow.ll
index 1ec36da38f77..8fd3db29197e 100644
--- a/test/CodeGen/ARM/vector-extend-narrow.ll
+++ b/test/CodeGen/ARM/vector-extend-narrow.ll
@@ -20,7 +20,9 @@ define float @f(<4 x i16>* nocapture %in) {
; CHECK: g:
define float @g(<4 x i8>* nocapture %in) {
- ; CHECK: vldr
+; Note: vld1 here is reasonably important. Mixing VFP and NEON
+; instructions is bad on some cores
+ ; CHECK: vld1
; CHECK: vmovl.u8
; CHECK: vmovl.u16
%1 = load <4 x i8>* %in
@@ -47,7 +49,9 @@ define <4 x i8> @h(<4 x float> %v) {
; CHECK: i:
define <4 x i8> @i(<4 x i8>* %x) {
- ; CHECK: vldr
+; Note: vld1 here is reasonably important. Mixing VFP and NEON
+; instructions is bad on some cores
+ ; CHECK: vld1
; CHECK: vmovl.s8
; CHECK: vmovl.s16
; CHECK: vrecpe
diff --git a/test/CodeGen/ARM/vfp.ll b/test/CodeGen/ARM/vfp.ll
index 49a69827bc05..7a4b34f4a3f0 100644
--- a/test/CodeGen/ARM/vfp.ll
+++ b/test/CodeGen/ARM/vfp.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 -disable-post-ra | FileCheck %s
-; RUN: llc < %s -march=arm -mattr=+vfp2 -disable-post-ra -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+vfp2 -disable-post-ra | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+vfp2 -disable-post-ra -regalloc=basic | FileCheck %s
define void @test(float* %P, double* %D) {
%A = load float* %P ; <float> [#uses=1]
@@ -17,11 +17,11 @@ define void @test_abs(float* %P, double* %D) {
;CHECK: test_abs:
%a = load float* %P ; <float> [#uses=1]
;CHECK: vabs.f32
- %b = call float @fabsf( float %a ) ; <float> [#uses=1]
+ %b = call float @fabsf( float %a ) readnone ; <float> [#uses=1]
store float %b, float* %P
%A = load double* %D ; <double> [#uses=1]
;CHECK: vabs.f64
- %B = call double @fabs( double %A ) ; <double> [#uses=1]
+ %B = call double @fabs( double %A ) readnone ; <double> [#uses=1]
store double %B, double* %D
ret void
}
diff --git a/test/CodeGen/ARM/vlddup.ll b/test/CodeGen/ARM/vlddup.ll
index 61d73c15f31f..c69473f87f98 100644
--- a/test/CodeGen/ARM/vlddup.ll
+++ b/test/CodeGen/ARM/vlddup.ll
@@ -75,12 +75,12 @@ define <8 x i8> @vld2dupi8(i8* %A) nounwind {
ret <8 x i8> %tmp5
}
-define <4 x i16> @vld2dupi16(i16* %A) nounwind {
+define <4 x i16> @vld2dupi16(i8* %A) nounwind {
;CHECK: vld2dupi16:
;Check that a power-of-two alignment smaller than the total size of the memory
;being loaded is ignored.
;CHECK: vld2.16 {d16[], d17[]}, [r0]
- %tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+ %tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
%tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
%tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1
@@ -94,7 +94,8 @@ define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind {
;CHECK: vld2dupi16_update:
;CHECK: vld2.16 {d16[], d17[]}, [r1]!
%A = load i16** %ptr
- %tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+ %A2 = bitcast i16* %A to i8*
+ %tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %A2, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
%tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
%tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1
@@ -105,11 +106,11 @@ define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind {
ret <4 x i16> %tmp5
}
-define <2 x i32> @vld2dupi32(i32* %A) nounwind {
+define <2 x i32> @vld2dupi32(i8* %A) nounwind {
;CHECK: vld2dupi32:
;Check the alignment value. Max for this instruction is 64 bits:
;CHECK: vld2.32 {d16[], d17[]}, [r0, :64]
- %tmp0 = tail call %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i32* %A, <2 x i32> undef, <2 x i32> undef, i32 0, i32 16)
+ %tmp0 = tail call %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %A, <2 x i32> undef, <2 x i32> undef, i32 0, i32 16)
%tmp1 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 0
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
%tmp3 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 1
@@ -119,8 +120,8 @@ define <2 x i32> @vld2dupi32(i32* %A) nounwind {
}
declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
-declare %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i16*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
-declare %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i32*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
@@ -144,11 +145,11 @@ define <8 x i8> @vld3dupi8_update(i8** %ptr, i32 %inc) nounwind {
ret <8 x i8> %tmp8
}
-define <4 x i16> @vld3dupi16(i16* %A) nounwind {
+define <4 x i16> @vld3dupi16(i8* %A) nounwind {
;CHECK: vld3dupi16:
;Check the (default) alignment value. VLD3 does not support alignment.
;CHECK: vld3.16 {d16[], d17[], d18[]}, [r0]
- %tmp0 = tail call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 8)
+ %tmp0 = tail call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 8)
%tmp1 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 0
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
%tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 1
@@ -161,7 +162,7 @@ define <4 x i16> @vld3dupi16(i16* %A) nounwind {
}
declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
-declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i16*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
@@ -171,7 +172,8 @@ define <4 x i16> @vld4dupi16_update(i16** %ptr) nounwind {
;CHECK: vld4dupi16_update:
;CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r1]!
%A = load i16** %ptr
- %tmp0 = tail call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 1)
+ %A2 = bitcast i16* %A to i8*
+ %tmp0 = tail call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %A2, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 1)
%tmp1 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 0
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
%tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 1
@@ -188,12 +190,12 @@ define <4 x i16> @vld4dupi16_update(i16** %ptr) nounwind {
ret <4 x i16> %tmp11
}
-define <2 x i32> @vld4dupi32(i32* %A) nounwind {
+define <2 x i32> @vld4dupi32(i8* %A) nounwind {
;CHECK: vld4dupi32:
;Check the alignment value. An 8-byte alignment is allowed here even though
;it is smaller than the total size of the memory being loaded.
;CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r0, :64]
- %tmp0 = tail call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i32* %A, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 8)
+ %tmp0 = tail call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %A, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 8)
%tmp1 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 0
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
%tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 1
@@ -208,5 +210,5 @@ define <2 x i32> @vld4dupi32(i32* %A) nounwind {
ret <2 x i32> %tmp11
}
-declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i16*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
-declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i32*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll
index 61d89bbae835..74628f0c5ce6 100644
--- a/test/CodeGen/ARM/vmul.ll
+++ b/test/CodeGen/ARM/vmul.ll
@@ -525,3 +525,77 @@ define i16 @vmullWithInconsistentExtensions(<8 x i8> %vec) {
%3 = extractelement <8 x i16> %2, i32 0
ret i16 %3
}
+
+; A constant build_vector created for a vmull with half-width elements must
+; not introduce illegal types. <rdar://problem/11324364>
+define void @vmull_buildvector() nounwind optsize ssp align 2 {
+; CHECK: vmull_buildvector
+entry:
+ br i1 undef, label %for.end179, label %for.body.lr.ph
+
+for.body.lr.ph: ; preds = %entry
+ br label %for.body
+
+for.cond.loopexit: ; preds = %for.body33, %for.body
+ br i1 undef, label %for.end179, label %for.body
+
+for.body: ; preds = %for.cond.loopexit, %for.body.lr.ph
+ br i1 undef, label %for.cond.loopexit, label %for.body33.lr.ph
+
+for.body33.lr.ph: ; preds = %for.body
+ %.sub = select i1 undef, i32 0, i32 undef
+ br label %for.body33
+
+for.body33: ; preds = %for.body33, %for.body33.lr.ph
+ %add45 = add i32 undef, undef
+ %vld155 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* undef, i32 1)
+ %0 = load i32** undef, align 4
+ %shuffle.i250 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
+ %1 = bitcast <1 x i64> %shuffle.i250 to <8 x i8>
+ %vmovl.i249 = zext <8 x i8> %1 to <8 x i16>
+ %shuffle.i246 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
+ %shuffle.i240 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> <i32 1>
+ %2 = bitcast <1 x i64> %shuffle.i240 to <8 x i8>
+ %3 = bitcast <16 x i8> undef to <2 x i64>
+ %vmovl.i237 = zext <8 x i8> undef to <8 x i16>
+ %shuffle.i234 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
+ %shuffle.i226 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
+ %vmovl.i225 = zext <8 x i8> undef to <8 x i16>
+ %mul.i223 = mul <8 x i16> %vmovl.i249, %vmovl.i249
+ %vshl_n = shl <8 x i16> %mul.i223, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+ %vqsub2.i216 = tail call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> <i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256>, <8 x i16> %vshl_n) nounwind
+ %mul.i209 = mul <8 x i16> undef, <i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80>
+ %vshr_n130 = lshr <8 x i16> undef, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ %vshr_n134 = lshr <8 x i16> %mul.i209, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ %sub.i205 = sub <8 x i16> <i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80>, %vshr_n130
+ %sub.i203 = sub <8 x i16> <i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80>, %vshr_n134
+ %add.i200 = add <8 x i16> %sub.i205, <i16 96, i16 96, i16 96, i16 96, i16 96, i16 96, i16 96, i16 96>
+ %add.i198 = add <8 x i16> %add.i200, %sub.i203
+ %mul.i194 = mul <8 x i16> %add.i198, %vmovl.i237
+ %mul.i191 = mul <8 x i16> %vshr_n130, undef
+ %add.i192 = add <8 x i16> %mul.i191, %mul.i194
+ %mul.i187 = mul <8 x i16> %vshr_n134, undef
+ %add.i188 = add <8 x i16> %mul.i187, %add.i192
+ %mul.i185 = mul <8 x i16> undef, undef
+ %add.i186 = add <8 x i16> %mul.i185, undef
+ %vrshr_n160 = tail call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %add.i188, <8 x i16> <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>)
+ %vrshr_n163 = tail call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %add.i186, <8 x i16> <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>)
+ %mul.i184 = mul <8 x i16> undef, %vrshr_n160
+ %mul.i181 = mul <8 x i16> undef, %vmovl.i225
+ %add.i182 = add <8 x i16> %mul.i181, %mul.i184
+ %vrshr_n170 = tail call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %add.i182, <8 x i16> <i16 -7, i16 -7, i16 -7, i16 -7, i16 -7, i16 -7, i16 -7, i16 -7>)
+ %vqmovn1.i180 = tail call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %vrshr_n170) nounwind
+ %4 = bitcast <8 x i8> %vqmovn1.i180 to <1 x i64>
+ %shuffle.i = shufflevector <1 x i64> %4, <1 x i64> undef, <2 x i32> <i32 0, i32 1>
+ %5 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+ store <16 x i8> %5, <16 x i8>* undef, align 16
+ %add177 = add nsw i32 undef, 16
+ br i1 undef, label %for.body33, label %for.cond.loopexit
+
+for.end179: ; preds = %for.cond.loopexit, %entry
+ ret void
+}
+
+declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) nounwind readnone
diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll
index e3372a03793d..f117ab205d41 100644
--- a/test/CodeGen/ARM/vst3.ll
+++ b/test/CodeGen/ARM/vst3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon -disable-arm-fast-isel -O0 | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -fast-isel=0 -O0 | FileCheck %s
define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
;CHECK: vst3i8:
diff --git a/test/CodeGen/CPP/2007-06-16-Funcname.ll b/test/CodeGen/CPP/2007-06-16-Funcname.ll
index 71fea12d9c2c..16e9798481fe 100644
--- a/test/CodeGen/CPP/2007-06-16-Funcname.ll
+++ b/test/CodeGen/CPP/2007-06-16-Funcname.ll
@@ -5,3 +5,4 @@ define void @foo() {
ret void
}
+
diff --git a/test/CodeGen/CellSPU/fcmp32.ll b/test/CodeGen/CellSPU/fcmp32.ll
index c14fd7ba4a46..f6b028dbb88a 100644
--- a/test/CodeGen/CellSPU/fcmp32.ll
+++ b/test/CodeGen/CellSPU/fcmp32.ll
@@ -1,4 +1,4 @@
-; RUN: llc --march=cellspu %s -o - | FileCheck %s
+; RUN: llc --mtriple=cellspu-unknown-elf %s -o - | FileCheck %s
; Exercise the floating point comparison operators for f32:
@@ -15,8 +15,8 @@ define i1 @fcmp_eq(float %arg1, float %arg2) {
define i1 @fcmp_mag_eq(float %arg1, float %arg2) {
; CHECK: fcmeq
; CHECK: bi $lr
- %1 = call float @fabsf(float %arg1)
- %2 = call float @fabsf(float %arg2)
+ %1 = call float @fabsf(float %arg1) readnone
+ %2 = call float @fabsf(float %arg2) readnone
%3 = fcmp oeq float %1, %2
ret i1 %3
}
diff --git a/test/CodeGen/CellSPU/fneg-fabs.ll b/test/CodeGen/CellSPU/fneg-fabs.ll
index 1e5e3b341440..6e01906dae69 100644
--- a/test/CodeGen/CellSPU/fneg-fabs.ll
+++ b/test/CodeGen/CellSPU/fneg-fabs.ll
@@ -32,11 +32,11 @@ declare double @fabs(double)
declare float @fabsf(float)
define double @fabs_dp(double %X) {
- %Y = call double @fabs( double %X )
+ %Y = call double @fabs( double %X ) readnone
ret double %Y
}
define float @fabs_sp(float %X) {
- %Y = call float @fabsf( float %X )
+ %Y = call float @fabsf( float %X ) readnone
ret float %Y
}
diff --git a/test/CodeGen/CellSPU/icmp16.ll b/test/CodeGen/CellSPU/icmp16.ll
index 32b12617cfc3..2f9b091faea3 100644
--- a/test/CodeGen/CellSPU/icmp16.ll
+++ b/test/CodeGen/CellSPU/icmp16.ll
@@ -1,14 +1,4 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep ilh %t1.s | count 15
-; RUN: grep ceqh %t1.s | count 29
-; RUN: grep ceqhi %t1.s | count 13
-; RUN: grep clgth %t1.s | count 15
-; RUN: grep cgth %t1.s | count 14
-; RUN: grep cgthi %t1.s | count 6
-; RUN: grep {selb\t\\\$3, \\\$6, \\\$5, \\\$3} %t1.s | count 7
-; RUN: grep {selb\t\\\$3, \\\$5, \\\$6, \\\$3} %t1.s | count 3
-; RUN: grep {selb\t\\\$3, \\\$5, \\\$4, \\\$3} %t1.s | count 17
-; RUN: grep {selb\t\\\$3, \\\$4, \\\$5, \\\$3} %t1.s | count 6
+; RUN: llc < %s -march=cellspu | FileCheck %s
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
target triple = "spu"
@@ -27,6 +17,10 @@ target triple = "spu"
; i16 integer comparisons:
define i16 @icmp_eq_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_eq_select_i16:
+; CHECK: ceqh
+; CHECK: selb $3, $6, $5, $3
+
entry:
%A = icmp eq i16 %arg1, %arg2
%B = select i1 %A, i16 %val1, i16 %val2
@@ -34,12 +28,22 @@ entry:
}
define i1 @icmp_eq_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_eq_setcc_i16:
+; CHECK: ilhu
+; CHECK: ceqh
+; CHECK: iohl
+; CHECK: shufb
+
entry:
%A = icmp eq i16 %arg1, %arg2
ret i1 %A
}
define i16 @icmp_eq_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_eq_immed01_i16:
+; CHECK: ceqhi
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp eq i16 %arg1, 511
%B = select i1 %A, i16 %val1, i16 %val2
@@ -47,6 +51,10 @@ entry:
}
define i16 @icmp_eq_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_eq_immed02_i16:
+; CHECK: ceqhi
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp eq i16 %arg1, -512
%B = select i1 %A, i16 %val1, i16 %val2
@@ -54,6 +62,10 @@ entry:
}
define i16 @icmp_eq_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_eq_immed03_i16:
+; CHECK: ceqhi
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp eq i16 %arg1, -1
%B = select i1 %A, i16 %val1, i16 %val2
@@ -61,6 +73,11 @@ entry:
}
define i16 @icmp_eq_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_eq_immed04_i16:
+; CHECK: ilh
+; CHECK: ceqh
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp eq i16 %arg1, 32768
%B = select i1 %A, i16 %val1, i16 %val2
@@ -68,6 +85,10 @@ entry:
}
define i16 @icmp_ne_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_ne_select_i16:
+; CHECK: ceqh
+; CHECK: selb $3, $5, $6, $3
+
entry:
%A = icmp ne i16 %arg1, %arg2
%B = select i1 %A, i16 %val1, i16 %val2
@@ -75,12 +96,23 @@ entry:
}
define i1 @icmp_ne_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_ne_setcc_i16:
+; CHECK: ceqh
+; CHECK: ilhu
+; CHECK: xorhi
+; CHECK: iohl
+; CHECK: shufb
+
entry:
%A = icmp ne i16 %arg1, %arg2
ret i1 %A
}
define i16 @icmp_ne_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_ne_immed01_i16:
+; CHECK: ceqhi
+; CHECK: selb $3, $4, $5, $3
+
entry:
%A = icmp ne i16 %arg1, 511
%B = select i1 %A, i16 %val1, i16 %val2
@@ -88,6 +120,10 @@ entry:
}
define i16 @icmp_ne_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_ne_immed02_i16:
+; CHECK: ceqhi
+; CHECK: selb $3, $4, $5, $3
+
entry:
%A = icmp ne i16 %arg1, -512
%B = select i1 %A, i16 %val1, i16 %val2
@@ -95,6 +131,10 @@ entry:
}
define i16 @icmp_ne_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_ne_immed03_i16:
+; CHECK: ceqhi
+; CHECK: selb $3, $4, $5, $3
+
entry:
%A = icmp ne i16 %arg1, -1
%B = select i1 %A, i16 %val1, i16 %val2
@@ -102,6 +142,11 @@ entry:
}
define i16 @icmp_ne_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_ne_immed04_i16:
+; CHECK: ilh
+; CHECK: ceqh
+; CHECK: selb $3, $4, $5, $3
+
entry:
%A = icmp ne i16 %arg1, 32768
%B = select i1 %A, i16 %val1, i16 %val2
@@ -109,6 +154,10 @@ entry:
}
define i16 @icmp_ugt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_ugt_select_i16:
+; CHECK: clgth
+; CHECK: selb $3, $6, $5, $3
+
entry:
%A = icmp ugt i16 %arg1, %arg2
%B = select i1 %A, i16 %val1, i16 %val2
@@ -116,12 +165,22 @@ entry:
}
define i1 @icmp_ugt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_ugt_setcc_i16:
+; CHECK: ilhu
+; CHECK: clgth
+; CHECK: iohl
+; CHECK: shufb
+
entry:
%A = icmp ugt i16 %arg1, %arg2
ret i1 %A
}
define i16 @icmp_ugt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_ugt_immed01_i16:
+; CHECK: clgthi
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp ugt i16 %arg1, 500
%B = select i1 %A, i16 %val1, i16 %val2
@@ -129,6 +188,10 @@ entry:
}
define i16 @icmp_ugt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_ugt_immed02_i16:
+; CHECK: ceqhi
+; CHECK: selb $3, $4, $5, $3
+
entry:
%A = icmp ugt i16 %arg1, 0
%B = select i1 %A, i16 %val1, i16 %val2
@@ -136,6 +199,10 @@ entry:
}
define i16 @icmp_ugt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_ugt_immed03_i16:
+; CHECK: clgthi
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp ugt i16 %arg1, 65024
%B = select i1 %A, i16 %val1, i16 %val2
@@ -143,6 +210,11 @@ entry:
}
define i16 @icmp_ugt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_ugt_immed04_i16:
+; CHECK: ilh
+; CHECK: clgth
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp ugt i16 %arg1, 32768
%B = select i1 %A, i16 %val1, i16 %val2
@@ -150,6 +222,12 @@ entry:
}
define i16 @icmp_uge_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_uge_select_i16:
+; CHECK: ceqh
+; CHECK: clgth
+; CHECK: or
+; CHECK: selb $3, $6, $5, $3
+
entry:
%A = icmp uge i16 %arg1, %arg2
%B = select i1 %A, i16 %val1, i16 %val2
@@ -157,6 +235,14 @@ entry:
}
define i1 @icmp_uge_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_uge_setcc_i16:
+; CHECK: ceqh
+; CHECK: clgth
+; CHECK: ilhu
+; CHECK: or
+; CHECK: iohl
+; CHECK: shufb
+
entry:
%A = icmp uge i16 %arg1, %arg2
ret i1 %A
@@ -169,6 +255,12 @@ entry:
;; they'll ever be generated.
define i16 @icmp_ult_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_ult_select_i16:
+; CHECK: ceqh
+; CHECK: clgth
+; CHECK: nor
+; CHECK: selb $3, $6, $5, $3
+
entry:
%A = icmp ult i16 %arg1, %arg2
%B = select i1 %A, i16 %val1, i16 %val2
@@ -176,12 +268,26 @@ entry:
}
define i1 @icmp_ult_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_ult_setcc_i16:
+; CHECK: ceqh
+; CHECK: clgth
+; CHECK: ilhu
+; CHECK: nor
+; CHECK: iohl
+; CHECK: shufb
+
entry:
%A = icmp ult i16 %arg1, %arg2
ret i1 %A
}
define i16 @icmp_ult_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_ult_immed01_i16:
+; CHECK: ceqhi
+; CHECK: clgthi
+; CHECK: nor
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp ult i16 %arg1, 511
%B = select i1 %A, i16 %val1, i16 %val2
@@ -189,6 +295,12 @@ entry:
}
define i16 @icmp_ult_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_ult_immed02_i16:
+; CHECK: ceqhi
+; CHECK: clgthi
+; CHECK: nor
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp ult i16 %arg1, 65534
%B = select i1 %A, i16 %val1, i16 %val2
@@ -196,6 +308,12 @@ entry:
}
define i16 @icmp_ult_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_ult_immed03_i16:
+; CHECK: ceqhi
+; CHECK: clgthi
+; CHECK: nor
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp ult i16 %arg1, 65024
%B = select i1 %A, i16 %val1, i16 %val2
@@ -203,6 +321,13 @@ entry:
}
define i16 @icmp_ult_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_ult_immed04_i16:
+; CHECK: ilh
+; CHECK: ceqh
+; CHECK: clgth
+; CHECK: nor
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp ult i16 %arg1, 32769
%B = select i1 %A, i16 %val1, i16 %val2
@@ -210,6 +335,10 @@ entry:
}
define i16 @icmp_ule_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_ule_select_i16:
+; CHECK: clgth
+; CHECK: selb $3, $5, $6, $3
+
entry:
%A = icmp ule i16 %arg1, %arg2
%B = select i1 %A, i16 %val1, i16 %val2
@@ -217,6 +346,13 @@ entry:
}
define i1 @icmp_ule_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_ule_setcc_i16:
+; CHECK: clgth
+; CHECK: ilhu
+; CHECK: xorhi
+; CHECK: iohl
+; CHECK: shufb
+
entry:
%A = icmp ule i16 %arg1, %arg2
ret i1 %A
@@ -229,6 +365,10 @@ entry:
;; they'll ever be generated.
define i16 @icmp_sgt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_sgt_select_i16:
+; CHECK: cgth
+; CHECK: selb $3, $6, $5, $3
+
entry:
%A = icmp sgt i16 %arg1, %arg2
%B = select i1 %A, i16 %val1, i16 %val2
@@ -236,12 +376,22 @@ entry:
}
define i1 @icmp_sgt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_sgt_setcc_i16:
+; CHECK: ilhu
+; CHECK: cgth
+; CHECK: iohl
+; CHECK: shufb
+
entry:
%A = icmp sgt i16 %arg1, %arg2
ret i1 %A
}
define i16 @icmp_sgt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_sgt_immed01_i16:
+; CHECK: cgthi
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp sgt i16 %arg1, 511
%B = select i1 %A, i16 %val1, i16 %val2
@@ -249,6 +399,10 @@ entry:
}
define i16 @icmp_sgt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_sgt_immed02_i16:
+; CHECK: cgthi
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp sgt i16 %arg1, -1
%B = select i1 %A, i16 %val1, i16 %val2
@@ -256,6 +410,10 @@ entry:
}
define i16 @icmp_sgt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_sgt_immed03_i16:
+; CHECK: cgthi
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp sgt i16 %arg1, -512
%B = select i1 %A, i16 %val1, i16 %val2
@@ -263,6 +421,11 @@ entry:
}
define i16 @icmp_sgt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_sgt_immed04_i16:
+; CHECK: ilh
+; CHECK: ceqh
+; CHECK: selb $3, $4, $5, $3
+
entry:
%A = icmp sgt i16 %arg1, 32768
%B = select i1 %A, i16 %val1, i16 %val2
@@ -270,6 +433,12 @@ entry:
}
define i16 @icmp_sge_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_sge_select_i16:
+; CHECK: ceqh
+; CHECK: cgth
+; CHECK: or
+; CHECK: selb $3, $6, $5, $3
+
entry:
%A = icmp sge i16 %arg1, %arg2
%B = select i1 %A, i16 %val1, i16 %val2
@@ -277,6 +446,14 @@ entry:
}
define i1 @icmp_sge_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_sge_setcc_i16:
+; CHECK: ceqh
+; CHECK: cgth
+; CHECK: ilhu
+; CHECK: or
+; CHECK: iohl
+; CHECK: shufb
+
entry:
%A = icmp sge i16 %arg1, %arg2
ret i1 %A
@@ -289,6 +466,12 @@ entry:
;; they'll ever be generated.
define i16 @icmp_slt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_slt_select_i16:
+; CHECK: ceqh
+; CHECK: cgth
+; CHECK: nor
+; CHECK: selb $3, $6, $5, $3
+
entry:
%A = icmp slt i16 %arg1, %arg2
%B = select i1 %A, i16 %val1, i16 %val2
@@ -296,12 +479,26 @@ entry:
}
define i1 @icmp_slt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_slt_setcc_i16:
+; CHECK: ceqh
+; CHECK: cgth
+; CHECK: ilhu
+; CHECK: nor
+; CHECK: iohl
+; CHECK: shufb
+
entry:
%A = icmp slt i16 %arg1, %arg2
ret i1 %A
}
define i16 @icmp_slt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_slt_immed01_i16:
+; CHECK: ceqhi
+; CHECK: cgthi
+; CHECK: nor
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp slt i16 %arg1, 511
%B = select i1 %A, i16 %val1, i16 %val2
@@ -309,6 +506,12 @@ entry:
}
define i16 @icmp_slt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_slt_immed02_i16:
+; CHECK: ceqhi
+; CHECK: cgthi
+; CHECK: nor
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp slt i16 %arg1, -512
%B = select i1 %A, i16 %val1, i16 %val2
@@ -316,6 +519,12 @@ entry:
}
define i16 @icmp_slt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_slt_immed03_i16:
+; CHECK: ceqhi
+; CHECK: cgthi
+; CHECK: nor
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp slt i16 %arg1, -1
%B = select i1 %A, i16 %val1, i16 %val2
@@ -323,6 +532,10 @@ entry:
}
define i16 @icmp_slt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_slt_immed04_i16:
+; CHECK: lr
+; CHECK-NETX: bi
+
entry:
%A = icmp slt i16 %arg1, 32768
%B = select i1 %A, i16 %val1, i16 %val2
@@ -330,6 +543,10 @@ entry:
}
define i16 @icmp_sle_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_sle_select_i16:
+; CHECK: cgth
+; CHECK: selb $3, $5, $6, $3
+
entry:
%A = icmp sle i16 %arg1, %arg2
%B = select i1 %A, i16 %val1, i16 %val2
@@ -337,6 +554,13 @@ entry:
}
define i1 @icmp_sle_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+; CHECK: icmp_sle_setcc_i16:
+; CHECK: cgth
+; CHECK: ilhu
+; CHECK: xorhi
+; CHECK: iohl
+; CHECK-NETX: bi
+
entry:
%A = icmp sle i16 %arg1, %arg2
ret i1 %A
diff --git a/test/CodeGen/CellSPU/icmp32.ll b/test/CodeGen/CellSPU/icmp32.ll
index ccbb5f7cde58..1794f4cd7b66 100644
--- a/test/CodeGen/CellSPU/icmp32.ll
+++ b/test/CodeGen/CellSPU/icmp32.ll
@@ -1,14 +1,4 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep ila %t1.s | count 6
-; RUN: grep ceq %t1.s | count 28
-; RUN: grep ceqi %t1.s | count 12
-; RUN: grep clgt %t1.s | count 16
-; RUN: grep clgti %t1.s | count 6
-; RUN: grep cgt %t1.s | count 16
-; RUN: grep cgti %t1.s | count 6
-; RUN: grep {selb\t\\\$3, \\\$6, \\\$5, \\\$3} %t1.s | count 7
-; RUN: grep {selb\t\\\$3, \\\$5, \\\$6, \\\$3} %t1.s | count 3
-; RUN: grep {selb\t\\\$3, \\\$5, \\\$4, \\\$3} %t1.s | count 20
+; RUN: llc < %s -march=cellspu | FileCheck %s
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
target triple = "spu"
@@ -27,6 +17,10 @@ target triple = "spu"
; i32 integer comparisons:
define i32 @icmp_eq_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_eq_select_i32:
+; CHECK: ceq
+; CHECK: selb $3, $6, $5, $3
+
entry:
%A = icmp eq i32 %arg1, %arg2
%B = select i1 %A, i32 %val1, i32 %val2
@@ -34,12 +28,22 @@ entry:
}
define i1 @icmp_eq_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_eq_setcc_i32:
+; CHECK: ilhu
+; CHECK: ceq
+; CHECK: iohl
+; CHECK: shufb
+
entry:
%A = icmp eq i32 %arg1, %arg2
ret i1 %A
}
define i32 @icmp_eq_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_eq_immed01_i32:
+; CHECK: ceqi
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp eq i32 %arg1, 511
%B = select i1 %A, i32 %val1, i32 %val2
@@ -47,6 +51,10 @@ entry:
}
define i32 @icmp_eq_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_eq_immed02_i32:
+; CHECK: ceqi
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp eq i32 %arg1, -512
%B = select i1 %A, i32 %val1, i32 %val2
@@ -54,6 +62,10 @@ entry:
}
define i32 @icmp_eq_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_eq_immed03_i32:
+; CHECK: ceqi
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp eq i32 %arg1, -1
%B = select i1 %A, i32 %val1, i32 %val2
@@ -61,6 +73,11 @@ entry:
}
define i32 @icmp_eq_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_eq_immed04_i32:
+; CHECK: ila
+; CHECK: ceq
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp eq i32 %arg1, 32768
%B = select i1 %A, i32 %val1, i32 %val2
@@ -68,6 +85,10 @@ entry:
}
define i32 @icmp_ne_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_ne_select_i32:
+; CHECK: ceq
+; CHECK: selb $3, $5, $6, $3
+
entry:
%A = icmp ne i32 %arg1, %arg2
%B = select i1 %A, i32 %val1, i32 %val2
@@ -75,12 +96,23 @@ entry:
}
define i1 @icmp_ne_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_ne_setcc_i32:
+; CHECK: ceq
+; CHECK: ilhu
+; CHECK: xori
+; CHECK: iohl
+; CHECK: shufb
+
entry:
%A = icmp ne i32 %arg1, %arg2
ret i1 %A
}
define i32 @icmp_ne_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_ne_immed01_i32:
+; CHECK: ceqi
+; CHECK: selb $3, $4, $5, $3
+
entry:
%A = icmp ne i32 %arg1, 511
%B = select i1 %A, i32 %val1, i32 %val2
@@ -88,6 +120,10 @@ entry:
}
define i32 @icmp_ne_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_ne_immed02_i32:
+; CHECK: ceqi
+; CHECK: selb $3, $4, $5, $3
+
entry:
%A = icmp ne i32 %arg1, -512
%B = select i1 %A, i32 %val1, i32 %val2
@@ -95,6 +131,10 @@ entry:
}
define i32 @icmp_ne_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_ne_immed03_i32:
+; CHECK: ceqi
+; CHECK: selb $3, $4, $5, $3
+
entry:
%A = icmp ne i32 %arg1, -1
%B = select i1 %A, i32 %val1, i32 %val2
@@ -102,6 +142,11 @@ entry:
}
define i32 @icmp_ne_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_ne_immed04_i32:
+; CHECK: ila
+; CHECK: ceq
+; CHECK: selb $3, $4, $5, $3
+
entry:
%A = icmp ne i32 %arg1, 32768
%B = select i1 %A, i32 %val1, i32 %val2
@@ -109,6 +154,10 @@ entry:
}
define i32 @icmp_ugt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_ugt_select_i32:
+; CHECK: clgt
+; CHECK: selb $3, $6, $5, $3
+
entry:
%A = icmp ugt i32 %arg1, %arg2
%B = select i1 %A, i32 %val1, i32 %val2
@@ -116,12 +165,22 @@ entry:
}
define i1 @icmp_ugt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_ugt_setcc_i32:
+; CHECK: ilhu
+; CHECK: clgt
+; CHECK: iohl
+; CHECK: shufb
+
entry:
%A = icmp ugt i32 %arg1, %arg2
ret i1 %A
}
define i32 @icmp_ugt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_ugt_immed01_i32:
+; CHECK: clgti
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp ugt i32 %arg1, 511
%B = select i1 %A, i32 %val1, i32 %val2
@@ -129,6 +188,10 @@ entry:
}
define i32 @icmp_ugt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_ugt_immed02_i32:
+; CHECK: clgti
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp ugt i32 %arg1, 4294966784
%B = select i1 %A, i32 %val1, i32 %val2
@@ -136,6 +199,10 @@ entry:
}
define i32 @icmp_ugt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_ugt_immed03_i32:
+; CHECK: clgti
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp ugt i32 %arg1, 4294967293
%B = select i1 %A, i32 %val1, i32 %val2
@@ -143,6 +210,11 @@ entry:
}
define i32 @icmp_ugt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_ugt_immed04_i32:
+; CHECK: ila
+; CHECK: clgt
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp ugt i32 %arg1, 32768
%B = select i1 %A, i32 %val1, i32 %val2
@@ -150,6 +222,12 @@ entry:
}
define i32 @icmp_uge_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_uge_select_i32:
+; CHECK: ceq
+; CHECK: clgt
+; CHECK: or
+; CHECK: selb $3, $6, $5, $3
+
entry:
%A = icmp uge i32 %arg1, %arg2
%B = select i1 %A, i32 %val1, i32 %val2
@@ -157,6 +235,14 @@ entry:
}
define i1 @icmp_uge_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_uge_setcc_i32:
+; CHECK: ceq
+; CHECK: clgt
+; CHECK: ilhu
+; CHECK: or
+; CHECK: iohl
+; CHECK: shufb
+
entry:
%A = icmp uge i32 %arg1, %arg2
ret i1 %A
@@ -169,6 +255,12 @@ entry:
;; they'll ever be generated.
define i32 @icmp_ult_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_ult_select_i32:
+; CHECK: ceq
+; CHECK: clgt
+; CHECK: nor
+; CHECK: selb $3, $6, $5, $3
+
entry:
%A = icmp ult i32 %arg1, %arg2
%B = select i1 %A, i32 %val1, i32 %val2
@@ -176,12 +268,26 @@ entry:
}
define i1 @icmp_ult_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_ult_setcc_i32:
+; CHECK: ceq
+; CHECK: clgt
+; CHECK: ilhu
+; CHECK: nor
+; CHECK: iohl
+; CHECK: shufb
+
entry:
%A = icmp ult i32 %arg1, %arg2
ret i1 %A
}
define i32 @icmp_ult_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_ult_immed01_i32:
+; CHECK: ceqi
+; CHECK: clgti
+; CHECK: nor
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp ult i32 %arg1, 511
%B = select i1 %A, i32 %val1, i32 %val2
@@ -189,6 +295,12 @@ entry:
}
define i32 @icmp_ult_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_ult_immed02_i32:
+; CHECK: ceqi
+; CHECK: clgti
+; CHECK: nor
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp ult i32 %arg1, 4294966784
%B = select i1 %A, i32 %val1, i32 %val2
@@ -196,6 +308,12 @@ entry:
}
define i32 @icmp_ult_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_ult_immed03_i32:
+; CHECK: ceqi
+; CHECK: clgti
+; CHECK: nor
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp ult i32 %arg1, 4294967293
%B = select i1 %A, i32 %val1, i32 %val2
@@ -203,6 +321,11 @@ entry:
}
define i32 @icmp_ult_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_ult_immed04_i32:
+; CHECK: rotmi
+; CHECK: ceqi
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp ult i32 %arg1, 32768
%B = select i1 %A, i32 %val1, i32 %val2
@@ -210,6 +333,10 @@ entry:
}
define i32 @icmp_ule_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_ule_select_i32:
+; CHECK: clgt
+; CHECK: selb $3, $5, $6, $3
+
entry:
%A = icmp ule i32 %arg1, %arg2
%B = select i1 %A, i32 %val1, i32 %val2
@@ -217,6 +344,13 @@ entry:
}
define i1 @icmp_ule_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_ule_setcc_i32:
+; CHECK: clgt
+; CHECK: ilhu
+; CHECK: xori
+; CHECK: iohl
+; CHECK: shufb
+
entry:
%A = icmp ule i32 %arg1, %arg2
ret i1 %A
@@ -229,6 +363,10 @@ entry:
;; they'll ever be generated.
define i32 @icmp_sgt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_sgt_select_i32:
+; CHECK: cgt
+; CHECK: selb $3, $6, $5, $3
+
entry:
%A = icmp sgt i32 %arg1, %arg2
%B = select i1 %A, i32 %val1, i32 %val2
@@ -236,12 +374,22 @@ entry:
}
define i1 @icmp_sgt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_sgt_setcc_i32:
+; CHECK: ilhu
+; CHECK: cgt
+; CHECK: iohl
+; CHECK: shufb
+
entry:
%A = icmp sgt i32 %arg1, %arg2
ret i1 %A
}
define i32 @icmp_sgt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_sgt_immed01_i32:
+; CHECK: cgti
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp sgt i32 %arg1, 511
%B = select i1 %A, i32 %val1, i32 %val2
@@ -249,6 +397,10 @@ entry:
}
define i32 @icmp_sgt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_sgt_immed02_i32:
+; CHECK: cgti
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp sgt i32 %arg1, 4294966784
%B = select i1 %A, i32 %val1, i32 %val2
@@ -256,6 +408,10 @@ entry:
}
define i32 @icmp_sgt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_sgt_immed03_i32:
+; CHECK: cgti
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp sgt i32 %arg1, 4294967293
%B = select i1 %A, i32 %val1, i32 %val2
@@ -263,6 +419,11 @@ entry:
}
define i32 @icmp_sgt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_sgt_immed04_i32:
+; CHECK: ila
+; CHECK: cgt
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp sgt i32 %arg1, 32768
%B = select i1 %A, i32 %val1, i32 %val2
@@ -270,6 +431,12 @@ entry:
}
define i32 @icmp_sge_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_sge_select_i32:
+; CHECK: ceq
+; CHECK: cgt
+; CHECK: or
+; CHECK: selb $3, $6, $5, $3
+
entry:
%A = icmp sge i32 %arg1, %arg2
%B = select i1 %A, i32 %val1, i32 %val2
@@ -277,6 +444,14 @@ entry:
}
define i1 @icmp_sge_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_sge_setcc_i32:
+; CHECK: ceq
+; CHECK: cgt
+; CHECK: ilhu
+; CHECK: or
+; CHECK: iohl
+; CHECK: shufb
+
entry:
%A = icmp sge i32 %arg1, %arg2
ret i1 %A
@@ -289,6 +464,12 @@ entry:
;; they'll ever be generated.
define i32 @icmp_slt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_slt_select_i32:
+; CHECK: ceq
+; CHECK: cgt
+; CHECK: nor
+; CHECK: selb $3, $6, $5, $3
+
entry:
%A = icmp slt i32 %arg1, %arg2
%B = select i1 %A, i32 %val1, i32 %val2
@@ -296,12 +477,26 @@ entry:
}
define i1 @icmp_slt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_slt_setcc_i32:
+; CHECK: ceq
+; CHECK: cgt
+; CHECK: ilhu
+; CHECK: nor
+; CHECK: iohl
+; CHECK: shufb
+
entry:
%A = icmp slt i32 %arg1, %arg2
ret i1 %A
}
define i32 @icmp_slt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_slt_immed01_i32:
+; CHECK: ceqi
+; CHECK: cgti
+; CHECK: nor
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp slt i32 %arg1, 511
%B = select i1 %A, i32 %val1, i32 %val2
@@ -309,6 +504,12 @@ entry:
}
define i32 @icmp_slt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_slt_immed02_i32:
+; CHECK: ceqi
+; CHECK: cgti
+; CHECK: nor
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp slt i32 %arg1, -512
%B = select i1 %A, i32 %val1, i32 %val2
@@ -316,6 +517,12 @@ entry:
}
define i32 @icmp_slt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_slt_immed03_i32:
+; CHECK: ceqi
+; CHECK: cgti
+; CHECK: nor
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp slt i32 %arg1, -1
%B = select i1 %A, i32 %val1, i32 %val2
@@ -323,6 +530,13 @@ entry:
}
define i32 @icmp_slt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_slt_immed04_i32:
+; CHECK: ila
+; CHECK: ceq
+; CHECK: cgt
+; CHECK: nor
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp slt i32 %arg1, 32768
%B = select i1 %A, i32 %val1, i32 %val2
@@ -330,6 +544,10 @@ entry:
}
define i32 @icmp_sle_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_sle_select_i32:
+; CHECK: cgt
+; CHECK: selb $3, $5, $6, $3
+
entry:
%A = icmp sle i32 %arg1, %arg2
%B = select i1 %A, i32 %val1, i32 %val2
@@ -337,6 +555,13 @@ entry:
}
define i1 @icmp_sle_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+; CHECK: icmp_sle_setcc_i32:
+; CHECK: cgt
+; CHECK: ilhu
+; CHECK: xori
+; CHECK: iohl
+; CHECK: shufb
+
entry:
%A = icmp sle i32 %arg1, %arg2
ret i1 %A
diff --git a/test/CodeGen/CellSPU/icmp8.ll b/test/CodeGen/CellSPU/icmp8.ll
index 5517d104ab9f..1db641e5a853 100644
--- a/test/CodeGen/CellSPU/icmp8.ll
+++ b/test/CodeGen/CellSPU/icmp8.ll
@@ -1,13 +1,4 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep ceqb %t1.s | count 24
-; RUN: grep ceqbi %t1.s | count 12
-; RUN: grep clgtb %t1.s | count 11
-; RUN: grep cgtb %t1.s | count 13
-; RUN: grep cgtbi %t1.s | count 5
-; RUN: grep {selb\t\\\$3, \\\$6, \\\$5, \\\$3} %t1.s | count 7
-; RUN: grep {selb\t\\\$3, \\\$5, \\\$6, \\\$3} %t1.s | count 3
-; RUN: grep {selb\t\\\$3, \\\$5, \\\$4, \\\$3} %t1.s | count 11
-; RUN: grep {selb\t\\\$3, \\\$4, \\\$5, \\\$3} %t1.s | count 4
+; RUN: llc < %s -march=cellspu | FileCheck %s
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
target triple = "spu"
@@ -26,6 +17,10 @@ target triple = "spu"
; i8 integer comparisons:
define i8 @icmp_eq_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_eq_select_i8:
+; CHECK: ceqb
+; CHECK: selb $3, $6, $5, $3
+
entry:
%A = icmp eq i8 %arg1, %arg2
%B = select i1 %A, i8 %val1, i8 %val2
@@ -33,12 +28,20 @@ entry:
}
define i1 @icmp_eq_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_eq_setcc_i8:
+; CHECK: ceqb
+; CHECK-NEXT: bi
+
entry:
%A = icmp eq i8 %arg1, %arg2
ret i1 %A
}
define i8 @icmp_eq_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_eq_immed01_i8:
+; CHECK: ceqbi
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp eq i8 %arg1, 127
%B = select i1 %A, i8 %val1, i8 %val2
@@ -46,6 +49,10 @@ entry:
}
define i8 @icmp_eq_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_eq_immed02_i8:
+; CHECK: ceqbi
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp eq i8 %arg1, -128
%B = select i1 %A, i8 %val1, i8 %val2
@@ -53,6 +60,10 @@ entry:
}
define i8 @icmp_eq_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_eq_immed03_i8:
+; CHECK: ceqbi
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp eq i8 %arg1, -1
%B = select i1 %A, i8 %val1, i8 %val2
@@ -60,6 +71,10 @@ entry:
}
define i8 @icmp_ne_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_ne_select_i8:
+; CHECK: ceqb
+; CHECK: selb $3, $5, $6, $3
+
entry:
%A = icmp ne i8 %arg1, %arg2
%B = select i1 %A, i8 %val1, i8 %val2
@@ -67,12 +82,21 @@ entry:
}
define i1 @icmp_ne_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_ne_setcc_i8:
+; CHECK: ceqb
+; CHECK: xorbi
+; CHECK-NEXT: bi
+
entry:
%A = icmp ne i8 %arg1, %arg2
ret i1 %A
}
define i8 @icmp_ne_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_ne_immed01_i8:
+; CHECK: ceqbi
+; CHECK: selb $3, $4, $5, $3
+
entry:
%A = icmp ne i8 %arg1, 127
%B = select i1 %A, i8 %val1, i8 %val2
@@ -80,6 +104,10 @@ entry:
}
define i8 @icmp_ne_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_ne_immed02_i8:
+; CHECK: ceqbi
+; CHECK: selb $3, $4, $5, $3
+
entry:
%A = icmp ne i8 %arg1, -128
%B = select i1 %A, i8 %val1, i8 %val2
@@ -87,6 +115,10 @@ entry:
}
define i8 @icmp_ne_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_ne_immed03_i8:
+; CHECK: ceqbi
+; CHECK: selb $3, $4, $5, $3
+
entry:
%A = icmp ne i8 %arg1, -1
%B = select i1 %A, i8 %val1, i8 %val2
@@ -94,6 +126,10 @@ entry:
}
define i8 @icmp_ugt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_ugt_select_i8:
+; CHECK: clgtb
+; CHECK: selb $3, $6, $5, $3
+
entry:
%A = icmp ugt i8 %arg1, %arg2
%B = select i1 %A, i8 %val1, i8 %val2
@@ -101,12 +137,20 @@ entry:
}
define i1 @icmp_ugt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_ugt_setcc_i8:
+; CHECK: clgtb
+; CHECK-NEXT: bi
+
entry:
%A = icmp ugt i8 %arg1, %arg2
ret i1 %A
}
define i8 @icmp_ugt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_ugt_immed01_i8:
+; CHECK: clgtbi
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp ugt i8 %arg1, 126
%B = select i1 %A, i8 %val1, i8 %val2
@@ -114,6 +158,12 @@ entry:
}
define i8 @icmp_uge_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_uge_select_i8:
+; CHECK: ceqb
+; CHECK: clgtb
+; CHECK: or
+; CHECK: selb $3, $6, $5, $3
+
entry:
%A = icmp uge i8 %arg1, %arg2
%B = select i1 %A, i8 %val1, i8 %val2
@@ -121,6 +171,12 @@ entry:
}
define i1 @icmp_uge_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_uge_setcc_i8:
+; CHECK: ceqb
+; CHECK: clgtb
+; CHECK: or
+; CHECK-NEXT: bi
+
entry:
%A = icmp uge i8 %arg1, %arg2
ret i1 %A
@@ -133,6 +189,12 @@ entry:
;; they'll ever be generated.
define i8 @icmp_ult_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_ult_select_i8:
+; CHECK: ceqb
+; CHECK: clgtb
+; CHECK: nor
+; CHECK: selb $3, $6, $5, $3
+
entry:
%A = icmp ult i8 %arg1, %arg2
%B = select i1 %A, i8 %val1, i8 %val2
@@ -140,12 +202,24 @@ entry:
}
define i1 @icmp_ult_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_ult_setcc_i8:
+; CHECK: ceqb
+; CHECK: clgtb
+; CHECK: nor
+; CHECK-NEXT: bi
+
entry:
%A = icmp ult i8 %arg1, %arg2
ret i1 %A
}
define i8 @icmp_ult_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_ult_immed01_i8:
+; CHECK: ceqbi
+; CHECK: clgtbi
+; CHECK: nor
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp ult i8 %arg1, 253
%B = select i1 %A, i8 %val1, i8 %val2
@@ -153,6 +227,12 @@ entry:
}
define i8 @icmp_ult_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_ult_immed02_i8:
+; CHECK: ceqbi
+; CHECK: clgtbi
+; CHECK: nor
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp ult i8 %arg1, 129
%B = select i1 %A, i8 %val1, i8 %val2
@@ -160,6 +240,10 @@ entry:
}
define i8 @icmp_ule_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_ule_select_i8:
+; CHECK: clgtb
+; CHECK: selb $3, $5, $6, $3
+
entry:
%A = icmp ule i8 %arg1, %arg2
%B = select i1 %A, i8 %val1, i8 %val2
@@ -167,6 +251,11 @@ entry:
}
define i1 @icmp_ule_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_ule_setcc_i8:
+; CHECK: clgtb
+; CHECK: xorbi
+; CHECK-NEXT: bi
+
entry:
%A = icmp ule i8 %arg1, %arg2
ret i1 %A
@@ -179,6 +268,10 @@ entry:
;; they'll ever be generated.
define i8 @icmp_sgt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_sgt_select_i8:
+; CHECK: cgtb
+; CHECK: selb $3, $6, $5, $3
+
entry:
%A = icmp sgt i8 %arg1, %arg2
%B = select i1 %A, i8 %val1, i8 %val2
@@ -186,12 +279,20 @@ entry:
}
define i1 @icmp_sgt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_sgt_setcc_i8:
+; CHECK: cgtb
+; CHECK-NEXT: bi
+
entry:
%A = icmp sgt i8 %arg1, %arg2
ret i1 %A
}
define i8 @icmp_sgt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_sgt_immed01_i8:
+; CHECK: cgtbi
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp sgt i8 %arg1, 96
%B = select i1 %A, i8 %val1, i8 %val2
@@ -199,6 +300,10 @@ entry:
}
define i8 @icmp_sgt_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_sgt_immed02_i8:
+; CHECK: cgtbi
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp sgt i8 %arg1, -1
%B = select i1 %A, i8 %val1, i8 %val2
@@ -206,6 +311,10 @@ entry:
}
define i8 @icmp_sgt_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_sgt_immed03_i8:
+; CHECK: ceqbi
+; CHECK: selb $3, $4, $5, $3
+
entry:
%A = icmp sgt i8 %arg1, -128
%B = select i1 %A, i8 %val1, i8 %val2
@@ -213,6 +322,12 @@ entry:
}
define i8 @icmp_sge_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_sge_select_i8:
+; CHECK: ceqb
+; CHECK: cgtb
+; CHECK: or
+; CHECK: selb $3, $6, $5, $3
+
entry:
%A = icmp sge i8 %arg1, %arg2
%B = select i1 %A, i8 %val1, i8 %val2
@@ -220,6 +335,12 @@ entry:
}
define i1 @icmp_sge_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_sge_setcc_i8:
+; CHECK: ceqb
+; CHECK: cgtb
+; CHECK: or
+; CHECK-NEXT: bi
+
entry:
%A = icmp sge i8 %arg1, %arg2
ret i1 %A
@@ -232,6 +353,12 @@ entry:
;; they'll ever be generated.
define i8 @icmp_slt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_slt_select_i8:
+; CHECK: ceqb
+; CHECK: cgtb
+; CHECK: nor
+; CHECK: selb $3, $6, $5, $3
+
entry:
%A = icmp slt i8 %arg1, %arg2
%B = select i1 %A, i8 %val1, i8 %val2
@@ -239,12 +366,24 @@ entry:
}
define i1 @icmp_slt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_slt_setcc_i8:
+; CHECK: ceqb
+; CHECK: cgtb
+; CHECK: nor
+; CHECK-NEXT: bi
+
entry:
%A = icmp slt i8 %arg1, %arg2
ret i1 %A
}
define i8 @icmp_slt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_slt_immed01_i8:
+; CHECK: ceqbi
+; CHECK: cgtbi
+; CHECK: nor
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp slt i8 %arg1, 96
%B = select i1 %A, i8 %val1, i8 %val2
@@ -252,6 +391,12 @@ entry:
}
define i8 @icmp_slt_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_slt_immed02_i8:
+; CHECK: ceqbi
+; CHECK: cgtbi
+; CHECK: nor
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp slt i8 %arg1, -120
%B = select i1 %A, i8 %val1, i8 %val2
@@ -259,6 +404,12 @@ entry:
}
define i8 @icmp_slt_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_slt_immed03_i8:
+; CHECK: ceqbi
+; CHECK: cgtbi
+; CHECK: nor
+; CHECK: selb $3, $5, $4, $3
+
entry:
%A = icmp slt i8 %arg1, -1
%B = select i1 %A, i8 %val1, i8 %val2
@@ -266,6 +417,10 @@ entry:
}
define i8 @icmp_sle_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_sle_select_i8:
+; CHECK: cgtb
+; CHECK: selb $3, $5, $6, $3
+
entry:
%A = icmp sle i8 %arg1, %arg2
%B = select i1 %A, i8 %val1, i8 %val2
@@ -273,6 +428,11 @@ entry:
}
define i1 @icmp_sle_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+; CHECK: icmp_sle_setcc_i8:
+; CHECK: cgtb
+; CHECK: xorbi
+; CHECK-NEXT: bi
+
entry:
%A = icmp sle i8 %arg1, %arg2
ret i1 %A
diff --git a/test/CodeGen/CellSPU/shift_ops.ll b/test/CodeGen/CellSPU/shift_ops.ll
index f4aad44ed650..1ccc356dcf5a 100644
--- a/test/CodeGen/CellSPU/shift_ops.ll
+++ b/test/CodeGen/CellSPU/shift_ops.ll
@@ -1,20 +1,20 @@
; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep {shlh } %t1.s | count 10
-; RUN: grep {shlhi } %t1.s | count 3
-; RUN: grep {shl } %t1.s | count 10
-; RUN: grep {shli } %t1.s | count 3
-; RUN: grep {xshw } %t1.s | count 5
-; RUN: grep {and } %t1.s | count 15
-; RUN: grep {andi } %t1.s | count 4
-; RUN: grep {rotmi } %t1.s | count 4
-; RUN: grep {rotqmbyi } %t1.s | count 1
-; RUN: grep {rotqmbii } %t1.s | count 2
-; RUN: grep {rotqmby } %t1.s | count 1
-; RUN: grep {rotqmbi } %t1.s | count 2
-; RUN: grep {rotqbyi } %t1.s | count 1
-; RUN: grep {rotqbii } %t1.s | count 2
-; RUN: grep {rotqbybi } %t1.s | count 1
-; RUN: grep {sfi } %t1.s | count 6
+; RUN: grep "shlh " %t1.s | count 10
+; RUN: grep "shlhi " %t1.s | count 3
+; RUN: grep "shl " %t1.s | count 10
+; RUN: grep "shli " %t1.s | count 3
+; RUN: grep "xshw " %t1.s | count 5
+; RUN: grep "and " %t1.s | count 15
+; RUN: grep "andi " %t1.s | count 4
+; RUN: grep "rotmi " %t1.s | count 4
+; RUN: grep "rotqmbyi " %t1.s | count 1
+; RUN: grep "rotqmbii " %t1.s | count 2
+; RUN: grep "rotqmby " %t1.s | count 1
+; RUN: grep "rotqmbi " %t1.s | count 2
+; RUN: grep "rotqbyi " %t1.s | count 1
+; RUN: grep "rotqbii " %t1.s | count 2
+; RUN: grep "rotqbybi " %t1.s | count 1
+; RUN: grep "sfi " %t1.s | count 6
; RUN: cat %t1.s | FileCheck %s
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
diff --git a/test/CodeGen/CellSPU/stores.ll b/test/CodeGen/CellSPU/stores.ll
index 6ca5b0892304..43f8776a3d46 100644
--- a/test/CodeGen/CellSPU/stores.ll
+++ b/test/CodeGen/CellSPU/stores.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep {stqd.*0(\$3)} %t1.s | count 4
-; RUN: grep {stqd.*16(\$3)} %t1.s | count 4
+; RUN: grep 'stqd.*0($3)' %t1.s | count 4
+; RUN: grep 'stqd.*16($3)' %t1.s | count 4
; RUN: grep 16256 %t1.s | count 2
; RUN: grep 16384 %t1.s | count 1
; RUN: grep 771 %t1.s | count 4
@@ -8,7 +8,7 @@
; RUN: grep 1799 %t1.s | count 2
; RUN: grep 1543 %t1.s | count 5
; RUN: grep 1029 %t1.s | count 3
-; RUN: grep {shli.*, 4} %t1.s | count 4
+; RUN: grep 'shli.*, 4' %t1.s | count 4
; RUN: grep stqx %t1.s | count 4
; RUN: grep ilhu %t1.s | count 11
; RUN: grep iohl %t1.s | count 8
diff --git a/test/CodeGen/CellSPU/trunc.ll b/test/CodeGen/CellSPU/trunc.ll
index d16185238af0..e4c8fb49a32c 100644
--- a/test/CodeGen/CellSPU/trunc.ll
+++ b/test/CodeGen/CellSPU/trunc.ll
@@ -1,19 +1,19 @@
; RUN: llc < %s -march=cellspu > %t1.s
; RUN: grep shufb %t1.s | count 19
-; RUN: grep {ilhu.*1799} %t1.s | count 1
-; RUN: grep {ilhu.*771} %t1.s | count 2
-; RUN: grep {ilhu.*1543} %t1.s | count 1
-; RUN: grep {ilhu.*1029} %t1.s | count 1
-; RUN: grep {ilhu.*515} %t1.s | count 1
-; RUN: grep {ilhu.*3855} %t1.s | count 1
-; RUN: grep {ilhu.*3599} %t1.s | count 1
-; RUN: grep {ilhu.*3085} %t1.s | count 1
-; RUN: grep {iohl.*3855} %t1.s | count 1
-; RUN: grep {iohl.*3599} %t1.s | count 2
-; RUN: grep {iohl.*1543} %t1.s | count 2
-; RUN: grep {iohl.*771} %t1.s | count 2
-; RUN: grep {iohl.*515} %t1.s | count 1
-; RUN: grep {iohl.*1799} %t1.s | count 1
+; RUN: grep "ilhu.*1799" %t1.s | count 1
+; RUN: grep "ilhu.*771" %t1.s | count 2
+; RUN: grep "ilhu.*1543" %t1.s | count 1
+; RUN: grep "ilhu.*1029" %t1.s | count 1
+; RUN: grep "ilhu.*515" %t1.s | count 1
+; RUN: grep "ilhu.*3855" %t1.s | count 1
+; RUN: grep "ilhu.*3599" %t1.s | count 1
+; RUN: grep "ilhu.*3085" %t1.s | count 1
+; RUN: grep "iohl.*3855" %t1.s | count 1
+; RUN: grep "iohl.*3599" %t1.s | count 2
+; RUN: grep "iohl.*1543" %t1.s | count 2
+; RUN: grep "iohl.*771" %t1.s | count 2
+; RUN: grep "iohl.*515" %t1.s | count 1
+; RUN: grep "iohl.*1799" %t1.s | count 1
; RUN: grep lqa %t1.s | count 1
; RUN: grep cbd %t1.s | count 4
; RUN: grep chd %t1.s | count 3
diff --git a/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll b/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll
index 928edc4f4786..2dc5c162cd96 100644
--- a/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll
+++ b/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -regalloc=fast
+; RUN: llc < %s -regalloc=fast -optimize-regalloc=0
%struct.CHESS_POSITION = type { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, i32, i8, i8, [64 x i8], i8, i8, i8, i8, i8 }
@search = external global %struct.CHESS_POSITION ; <%struct.CHESS_POSITION*> [#uses=2]
diff --git a/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll b/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll
deleted file mode 100644
index ad418f7cfb0d..000000000000
--- a/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s
-; PR4317
-
-declare i32 @b()
-
-define void @a() {
-entry:
- ret void
-
-dummy:
- invoke i32 @b() to label %reg unwind label %reg
-
-reg:
- %lpad = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
- catch i8* null
- ret void
-}
-
-declare i32 @__gxx_personality_v0(...)
diff --git a/test/CodeGen/Generic/2012-06-08-APIntCrash.ll b/test/CodeGen/Generic/2012-06-08-APIntCrash.ll
new file mode 100644
index 000000000000..2c096bf42182
--- /dev/null
+++ b/test/CodeGen/Generic/2012-06-08-APIntCrash.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s
+
+define void @test1(<8 x i32>* %ptr)
+{
+ %1 = load <8 x i32>* %ptr, align 32
+ %2 = and <8 x i32> %1, <i32 0, i32 0, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 -1>
+ store <8 x i32> %2, <8 x i32>* %ptr, align 16
+ ret void
+}
diff --git a/test/CodeGen/Generic/2012-07-15-BuildVectorPromote.ll b/test/CodeGen/Generic/2012-07-15-BuildVectorPromote.ll
new file mode 100644
index 000000000000..6591c64d871e
--- /dev/null
+++ b/test/CodeGen/Generic/2012-07-15-BuildVectorPromote.ll
@@ -0,0 +1,8 @@
+; RUN: llc -mcpu=corei7 < %s
+; We don't care about the output, just that it doesn't crash
+
+define <1 x i1> @buildvec_promote() {
+ %cmp = icmp ule <1 x i32> undef, undef
+ %sel = select i1 undef, <1 x i1> undef, <1 x i1> %cmp
+ ret <1 x i1> %sel
+}
diff --git a/test/CodeGen/Generic/asm-large-immediate.ll b/test/CodeGen/Generic/asm-large-immediate.ll
index 605665bef6d1..891bbc9cc16d 100644
--- a/test/CodeGen/Generic/asm-large-immediate.ll
+++ b/test/CodeGen/Generic/asm-large-immediate.ll
@@ -1,8 +1,10 @@
-; RUN: llc < %s | grep 68719476738
+; RUN: llc < %s | FileCheck %s
define void @test() {
entry:
+; CHECK: /* result: 68719476738 */
tail call void asm sideeffect "/* result: ${0:c} */", "i,~{dirflag},~{fpsr},~{flags}"( i64 68719476738 )
+; CHECK: /* result: -68719476738 */
+ tail call void asm sideeffect "/* result: ${0:n} */", "i,~{dirflag},~{fpsr},~{flags}"( i64 68719476738 )
ret void
}
-
diff --git a/test/CodeGen/Generic/donothing.ll b/test/CodeGen/Generic/donothing.ll
new file mode 100644
index 000000000000..d6ba138fc6da
--- /dev/null
+++ b/test/CodeGen/Generic/donothing.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s | FileCheck %s
+
+declare i32 @__gxx_personality_v0(...)
+declare void @__cxa_call_unexpected(i8*)
+declare void @llvm.donothing() readnone
+
+; CHECK: f1
+define void @f1() nounwind uwtable ssp {
+entry:
+; CHECK-NOT donothing
+ invoke void @llvm.donothing()
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+ ret void
+
+lpad:
+ %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ filter [0 x i8*] zeroinitializer
+ %1 = extractvalue { i8*, i32 } %0, 0
+ tail call void @__cxa_call_unexpected(i8* %1) noreturn nounwind
+ unreachable
+}
+
+; CHECK: f2
+define void @f2() nounwind {
+entry:
+; CHECK-NOT donothing
+ call void @llvm.donothing()
+ ret void
+}
diff --git a/test/CodeGen/Generic/edge-bundles-blockIDs.ll b/test/CodeGen/Generic/edge-bundles-blockIDs.ll
index b4ae415b5013..d86c75838e53 100644
--- a/test/CodeGen/Generic/edge-bundles-blockIDs.ll
+++ b/test/CodeGen/Generic/edge-bundles-blockIDs.ll
@@ -1,6 +1,6 @@
; Make sure EdgeBoundles handles the case when the function size is less then
; the number of block IDs.
-; RUN: llc -regalloc=fast < %s
+; RUN: llc -regalloc=fast -optimize-regalloc=0 < %s
define void @foo() nounwind {
entry:
diff --git a/test/CodeGen/Generic/print-after.ll b/test/CodeGen/Generic/print-after.ll
new file mode 100644
index 000000000000..7505907ef773
--- /dev/null
+++ b/test/CodeGen/Generic/print-after.ll
@@ -0,0 +1,6 @@
+; RUN: not llc --help-hidden 2>&1 | FileCheck %s
+
+; CHECK: -print-after
+; CHECK-NOT: -print-after-all
+; CHECK: =simple-register-coalescing
+; CHECK: -print-after-all
diff --git a/test/CodeGen/Generic/print-machineinstrs.ll b/test/CodeGen/Generic/print-machineinstrs.ll
new file mode 100644
index 000000000000..75dceb5b2623
--- /dev/null
+++ b/test/CodeGen/Generic/print-machineinstrs.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -O3 -debug-pass=Structure -print-machineinstrs=branch-folder -o /dev/null 2>&1 | FileCheck %s
+; RUN: llc < %s -O3 -debug-pass=Structure -print-machineinstrs -o /dev/null 2>&1 | FileCheck %s
+; RUN: llc < %s -O3 -debug-pass=Structure -print-machineinstrs= -o /dev/null 2>&1 | FileCheck %s
+
+define i64 @foo(i64 %a, i64 %b) nounwind {
+; CHECK: -branch-folder -print-machineinstrs
+; CHECK: Control Flow Optimizer
+; CHECK-NEXT: MachineFunction Printer
+; CHECK: Machine code for function foo:
+ %c = add i64 %a, %b
+ %d = trunc i64 %c to i32
+ %e = zext i32 %d to i64
+ ret i64 %e
+}
diff --git a/test/CodeGen/Generic/stop-after.ll b/test/CodeGen/Generic/stop-after.ll
new file mode 100644
index 000000000000..557e097840af
--- /dev/null
+++ b/test/CodeGen/Generic/stop-after.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -debug-pass=Structure -stop-after=loop-reduce -o /dev/null 2>&1 | FileCheck %s -check-prefix=STOP
+; RUN: llc < %s -debug-pass=Structure -start-after=loop-reduce -o /dev/null 2>&1 | FileCheck %s -check-prefix=START
+
+; STOP: -loop-reduce -print-module
+; STOP: Loop Strength Reduction
+; STOP-NEXT: Machine Function Analysis
+
+; START: -machine-branch-prob -gc-lowering
+; START: FunctionPass Manager
+; START-NEXT: Lower Garbage Collection Instructions
diff --git a/test/CodeGen/Generic/undef-phi.ll b/test/CodeGen/Generic/undef-phi.ll
new file mode 100644
index 000000000000..10899f9fa2db
--- /dev/null
+++ b/test/CodeGen/Generic/undef-phi.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -verify-machineinstrs -verify-coalescing
+;
+; This function has a PHI with one undefined input. Verify that PHIElimination
+; inserts an IMPLICIT_DEF instruction in the predecessor so all paths to the use
+; pass through a def.
+
+%struct.xx_stack = type { i32, %struct.xx_stack* }
+
+define i32 @push(%struct.xx_stack* %stack) nounwind uwtable readonly ssp {
+entry:
+ %tobool1 = icmp eq %struct.xx_stack* %stack, null
+ br i1 %tobool1, label %for.end, label %for.body
+
+for.body:
+ %stack.addr.02 = phi %struct.xx_stack* [ %0, %for.body ], [ %stack, %entry ]
+ %next = getelementptr inbounds %struct.xx_stack* %stack.addr.02, i64 0, i32 1
+ %0 = load %struct.xx_stack** %next, align 8
+ %tobool = icmp eq %struct.xx_stack* %0, null
+ br i1 %tobool, label %for.end, label %for.body
+
+for.end:
+ %top.0.lcssa = phi %struct.xx_stack* [ undef, %entry ], [ %stack.addr.02, %for.body ]
+ %first = getelementptr inbounds %struct.xx_stack* %top.0.lcssa, i64 0, i32 0
+ %1 = load i32* %first, align 4
+ ret i32 %1
+}
diff --git a/test/CodeGen/Hexagon/args.ll b/test/CodeGen/Hexagon/args.ll
index 69002e0abcb1..e9ac8b67493e 100644
--- a/test/CodeGen/Hexagon/args.ll
+++ b/test/CodeGen/Hexagon/args.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched < %s | FileCheck %s
; CHECK: r[[T0:[0-9]+]] = #7
; CHECK: memw(r29 + #0) = r[[T0]]
; CHECK: r0 = #1
diff --git a/test/CodeGen/Hexagon/combine.ll b/test/CodeGen/Hexagon/combine.ll
index 36abd74d762b..721998596c81 100644
--- a/test/CodeGen/Hexagon/combine.ll
+++ b/test/CodeGen/Hexagon/combine.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; CHECK: combine(r{{[0-9]+}}, r{{[0-9]+}})
@j = external global i32
diff --git a/test/CodeGen/Hexagon/convertdptoint.ll b/test/CodeGen/Hexagon/convertdptoint.ll
new file mode 100644
index 000000000000..fa068c4c8a51
--- /dev/null
+++ b/test/CodeGen/Hexagon/convertdptoint.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; Check that we generate conversion from double precision floating point
+; to 32-bit int value in IEEE complaint mode in V5.
+
+; CHECK: r{{[0-9]+}} = convert_df2w(r{{[0-9]+}}:{{[0-9]+}}):chop
+
+define i32 @main() nounwind {
+entry:
+ %retval = alloca i32, align 4
+ %i = alloca i32, align 4
+ %a = alloca double, align 8
+ %b = alloca double, align 8
+ %c = alloca double, align 8
+ store i32 0, i32* %retval
+ store double 1.540000e+01, double* %a, align 8
+ store double 9.100000e+00, double* %b, align 8
+ %0 = load double* %a, align 8
+ %1 = load double* %b, align 8
+ %add = fadd double %0, %1
+ store double %add, double* %c, align 8
+ %2 = load double* %c, align 8
+ %conv = fptosi double %2 to i32
+ store i32 %conv, i32* %i, align 4
+ %3 = load i32* %i, align 4
+ ret i32 %3
+}
diff --git a/test/CodeGen/Hexagon/convertdptoll.ll b/test/CodeGen/Hexagon/convertdptoll.ll
new file mode 100644
index 000000000000..1b4dd86bd01b
--- /dev/null
+++ b/test/CodeGen/Hexagon/convertdptoll.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; Check that we generate conversion from double precision floating point
+; to 64-bit integer value in IEEE complaint mode in V5.
+
+; CHECK: r{{[0-9]+}}:{{[0-9]+}} = convert_df2d(r{{[0-9]+}}:{{[0-9]+}}):chop
+
+define i32 @main() nounwind {
+entry:
+ %retval = alloca i32, align 4
+ %i = alloca i64, align 8
+ %a = alloca double, align 8
+ %b = alloca double, align 8
+ %c = alloca double, align 8
+ store i32 0, i32* %retval
+ store double 1.540000e+01, double* %a, align 8
+ store double 9.100000e+00, double* %b, align 8
+ %0 = load double* %a, align 8
+ %1 = load double* %b, align 8
+ %add = fadd double %0, %1
+ store double %add, double* %c, align 8
+ %2 = load double* %c, align 8
+ %conv = fptosi double %2 to i64
+ store i64 %conv, i64* %i, align 8
+ %3 = load i64* %i, align 8
+ %conv1 = trunc i64 %3 to i32
+ ret i32 %conv1
+}
diff --git a/test/CodeGen/Hexagon/convertsptoint.ll b/test/CodeGen/Hexagon/convertsptoint.ll
new file mode 100644
index 000000000000..b8a9d6c8083c
--- /dev/null
+++ b/test/CodeGen/Hexagon/convertsptoint.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; Check that we generate conversion from single precision floating point
+; to 32-bit int value in IEEE complaint mode in V5.
+
+; CHECK: r{{[0-9]+}} = convert_sf2w(r{{[0-9]+}}):chop
+
+define i32 @main() nounwind {
+entry:
+ %retval = alloca i32, align 4
+ %i = alloca i32, align 4
+ %a = alloca float, align 4
+ %b = alloca float, align 4
+ %c = alloca float, align 4
+ store i32 0, i32* %retval
+ store float 0x402ECCCCC0000000, float* %a, align 4
+ store float 0x4022333340000000, float* %b, align 4
+ %0 = load float* %a, align 4
+ %1 = load float* %b, align 4
+ %add = fadd float %0, %1
+ store float %add, float* %c, align 4
+ %2 = load float* %c, align 4
+ %conv = fptosi float %2 to i32
+ store i32 %conv, i32* %i, align 4
+ %3 = load i32* %i, align 4
+ ret i32 %3
+}
diff --git a/test/CodeGen/Hexagon/convertsptoll.ll b/test/CodeGen/Hexagon/convertsptoll.ll
new file mode 100644
index 000000000000..1c4df94784aa
--- /dev/null
+++ b/test/CodeGen/Hexagon/convertsptoll.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; Check that we generate conversion from single precision floating point
+; to 64-bit int value in IEEE complaint mode in V5.
+
+; CHECK: r{{[0-9]+}}:{{[0-9]+}} = convert_sf2d(r{{[0-9]+}})
+
+define i32 @main() nounwind {
+entry:
+ %retval = alloca i32, align 4
+ %i = alloca i64, align 8
+ %a = alloca float, align 4
+ %b = alloca float, align 4
+ %c = alloca float, align 4
+ store i32 0, i32* %retval
+ store float 0x402ECCCCC0000000, float* %a, align 4
+ store float 0x4022333340000000, float* %b, align 4
+ %0 = load float* %a, align 4
+ %1 = load float* %b, align 4
+ %add = fadd float %0, %1
+ store float %add, float* %c, align 4
+ %2 = load float* %c, align 4
+ %conv = fptosi float %2 to i64
+ store i64 %conv, i64* %i, align 8
+ %3 = load i64* %i, align 8
+ %conv1 = trunc i64 %3 to i32
+ ret i32 %conv1
+}
diff --git a/test/CodeGen/Hexagon/dadd.ll b/test/CodeGen/Hexagon/dadd.ll
new file mode 100644
index 000000000000..602978ac01d3
--- /dev/null
+++ b/test/CodeGen/Hexagon/dadd.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; Check that we generate double precision floating point add in V5.
+
+; CHECK: r{{[0-9]+}}:{{[0-9]+}} = dfadd(r{{[0-9]+}}:{{[0-9]+}}, r{{[0-9]+}}:{{[0-9]+}})
+
+
+define i32 @main() nounwind {
+entry:
+ %a = alloca double, align 8
+ %b = alloca double, align 8
+ %c = alloca double, align 8
+ store double 1.540000e+01, double* %a, align 8
+ store double 9.100000e+00, double* %b, align 8
+ %0 = load double* %a, align 8
+ %1 = load double* %b, align 8
+ %add = fadd double %0, %1
+ store double %add, double* %c, align 8
+ ret i32 0
+}
diff --git a/test/CodeGen/Hexagon/dmul.ll b/test/CodeGen/Hexagon/dmul.ll
new file mode 100644
index 000000000000..d7437739ee90
--- /dev/null
+++ b/test/CodeGen/Hexagon/dmul.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; Check that we generate double precision floating point multiply in V5.
+
+; CHECK: r{{[0-9]+}}:{{[0-9]+}} = dfmpy(r{{[0-9]+}}:{{[0-9]+}}, r{{[0-9]+}}:{{[0-9]+}})
+
+define i32 @main() nounwind {
+entry:
+ %a = alloca double, align 8
+ %b = alloca double, align 8
+ %c = alloca double, align 8
+ store double 1.540000e+01, double* %a, align 8
+ store double 9.100000e+00, double* %b, align 8
+ %0 = load double* %b, align 8
+ %1 = load double* %a, align 8
+ %mul = fmul double %0, %1
+ store double %mul, double* %c, align 8
+ ret i32 0
+}
diff --git a/test/CodeGen/Hexagon/double.ll b/test/CodeGen/Hexagon/double.ll
index 04c2ec157eca..c3b6f378ec8a 100644
--- a/test/CodeGen/Hexagon/double.ll
+++ b/test/CodeGen/Hexagon/double.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; CHECK: __hexagon_adddf3
; CHECK: __hexagon_subdf3
diff --git a/test/CodeGen/Hexagon/doubleconvert-ieee-rnd-near.ll b/test/CodeGen/Hexagon/doubleconvert-ieee-rnd-near.ll
new file mode 100644
index 000000000000..54e7ce3bcdd3
--- /dev/null
+++ b/test/CodeGen/Hexagon/doubleconvert-ieee-rnd-near.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -enable-hexagon-ieee-rnd-near < %s | FileCheck %s
+; Check that we generate conversion from double precision floating point
+; to 32-bit int value in IEEE rounding to the nearest mode in V5.
+
+; CHECK: r{{[0-9]+}} = convert_df2w(r{{[0-9]+}}:{{[0-9]+}})
+
+define i32 @main() nounwind {
+entry:
+ %retval = alloca i32, align 4
+ %i = alloca i32, align 4
+ %a = alloca double, align 8
+ %b = alloca double, align 8
+ %c = alloca double, align 8
+ store i32 0, i32* %retval
+ store double 1.540000e+01, double* %a, align 8
+ store double 9.100000e+00, double* %b, align 8
+ %0 = load double* %a, align 8
+ %1 = load double* %b, align 8
+ %add = fadd double %0, %1
+ store double %add, double* %c, align 8
+ %2 = load double* %c, align 8
+ %conv = fptosi double %2 to i32
+ store i32 %conv, i32* %i, align 4
+ %3 = load i32* %i, align 4
+ ret i32 %3
+}
diff --git a/test/CodeGen/Hexagon/dsub.ll b/test/CodeGen/Hexagon/dsub.ll
new file mode 100644
index 000000000000..4f9d39ed0b24
--- /dev/null
+++ b/test/CodeGen/Hexagon/dsub.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; Check that we generate double precision floating point subtract in V5.
+
+; CHECK: r{{[0-9]+}}:{{[0-9]+}} = dfsub(r{{[0-9]+}}:{{[0-9]+}}, r{{[0-9]+}}:{{[0-9]+}})
+
+define i32 @main() nounwind {
+entry:
+ %a = alloca double, align 8
+ %b = alloca double, align 8
+ %c = alloca double, align 8
+ store double 1.540000e+01, double* %a, align 8
+ store double 9.100000e+00, double* %b, align 8
+ %0 = load double* %b, align 8
+ %1 = load double* %a, align 8
+ %sub = fsub double %0, %1
+ store double %sub, double* %c, align 8
+ ret i32 0
+}
diff --git a/test/CodeGen/Hexagon/dualstore.ll b/test/CodeGen/Hexagon/dualstore.ll
new file mode 100644
index 000000000000..9b27dda52c1d
--- /dev/null
+++ b/test/CodeGen/Hexagon/dualstore.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate dual stores in one packet in V4
+
+; CHECK: memw(r{{[0-9]+}} + #{{[0-9]+}}) = r{{[0-9]+}}
+; CHECK-NEXT: memw(r{{[0-9]+}} + #{{[0-9]+}}) = r{{[0-9]+}}
+; CHECK-NEXT: }
+
+@Reg = global i32 0, align 4
+define i32 @main() nounwind {
+entry:
+ %number= alloca i32, align 4
+ store i32 500000, i32* %number, align 4
+ %number1= alloca i32, align 4
+ store i32 100000, i32* %number1, align 4
+ ret i32 0
+}
+
diff --git a/test/CodeGen/Hexagon/fadd.ll b/test/CodeGen/Hexagon/fadd.ll
new file mode 100644
index 000000000000..b95e1475ff73
--- /dev/null
+++ b/test/CodeGen/Hexagon/fadd.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; Check that we generate sp floating point add in V5.
+
+; CHECK: r{{[0-9]+}} = sfadd(r{{[0-9]+}}, r{{[0-9]+}})
+
+define i32 @main() nounwind {
+entry:
+ %a = alloca float, align 4
+ %b = alloca float, align 4
+ %c = alloca float, align 4
+ store float 0x402ECCCCC0000000, float* %a, align 4
+ store float 0x4022333340000000, float* %b, align 4
+ %0 = load float* %a, align 4
+ %1 = load float* %b, align 4
+ %add = fadd float %0, %1
+ store float %add, float* %c, align 4
+ ret i32 0
+}
diff --git a/test/CodeGen/Hexagon/fcmp.ll b/test/CodeGen/Hexagon/fcmp.ll
new file mode 100644
index 000000000000..e7b649e2b8c0
--- /dev/null
+++ b/test/CodeGen/Hexagon/fcmp.ll
@@ -0,0 +1,37 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; Check that we generate floating point compare in V5
+
+; CHECK: p{{[0-2]+}} = sfcmp.{{.}}
+
+define i32 @foo(float %y) nounwind {
+entry:
+ %retval = alloca i32, align 4
+ %y.addr = alloca float, align 4
+ store float %y, float* %y.addr, align 4
+ %0 = load float* %y.addr, align 4
+ %cmp = fcmp ogt float %0, 0x406AD7EFA0000000
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ store i32 1, i32* %retval
+ br label %return
+
+if.else: ; preds = %entry
+ store i32 2, i32* %retval
+ br label %return
+
+return: ; preds = %if.else, %if.then
+ %1 = load i32* %retval
+ ret i32 %1
+}
+
+define i32 @main() nounwind {
+entry:
+ %retval = alloca i32, align 4
+ %a = alloca float, align 4
+ store i32 0, i32* %retval
+ store float 0x40012E0A00000000, float* %a, align 4
+ %0 = load float* %a, align 4
+ %call = call i32 @foo(float %0)
+ ret i32 %call
+}
diff --git a/test/CodeGen/Hexagon/float.ll b/test/CodeGen/Hexagon/float.ll
index 51acf2e501ce..bec9f5852e3c 100644
--- a/test/CodeGen/Hexagon/float.ll
+++ b/test/CodeGen/Hexagon/float.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; CHECK: __hexagon_addsf3
; CHECK: __hexagon_subsf3
diff --git a/test/CodeGen/Hexagon/floatconvert-ieee-rnd-near.ll b/test/CodeGen/Hexagon/floatconvert-ieee-rnd-near.ll
new file mode 100644
index 000000000000..bec9f5852e3c
--- /dev/null
+++ b/test/CodeGen/Hexagon/floatconvert-ieee-rnd-near.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: __hexagon_addsf3
+; CHECK: __hexagon_subsf3
+
+define void @foo(float* %acc, float %num, float %num2) nounwind {
+entry:
+ %acc.addr = alloca float*, align 4
+ %num.addr = alloca float, align 4
+ %num2.addr = alloca float, align 4
+ store float* %acc, float** %acc.addr, align 4
+ store float %num, float* %num.addr, align 4
+ store float %num2, float* %num2.addr, align 4
+ %0 = load float** %acc.addr, align 4
+ %1 = load float* %0
+ %2 = load float* %num.addr, align 4
+ %add = fadd float %1, %2
+ %3 = load float* %num2.addr, align 4
+ %sub = fsub float %add, %3
+ %4 = load float** %acc.addr, align 4
+ store float %sub, float* %4
+ ret void
+}
diff --git a/test/CodeGen/Hexagon/fmul.ll b/test/CodeGen/Hexagon/fmul.ll
new file mode 100644
index 000000000000..4766845b1143
--- /dev/null
+++ b/test/CodeGen/Hexagon/fmul.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; Check that we generate single precision floating point multiply in V5.
+
+; CHECK: r{{[0-9]+}} = sfmpy(r{{[0-9]+}}, r{{[0-9]+}})
+
+
+define i32 @main() nounwind {
+entry:
+ %a = alloca float, align 4
+ %b = alloca float, align 4
+ %c = alloca float, align 4
+ store float 0x402ECCCCC0000000, float* %a, align 4
+ store float 0x4022333340000000, float* %b, align 4
+ %0 = load float* %b, align 4
+ %1 = load float* %a, align 4
+ %mul = fmul float %0, %1
+ store float %mul, float* %c, align 4
+ ret i32 0
+}
diff --git a/test/CodeGen/Hexagon/frame.ll b/test/CodeGen/Hexagon/frame.ll
index c0a9fda46894..dc87c732d6fe 100644
--- a/test/CodeGen/Hexagon/frame.ll
+++ b/test/CodeGen/Hexagon/frame.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
@num = external global i32
@acc = external global i32
diff --git a/test/CodeGen/Hexagon/fsub.ll b/test/CodeGen/Hexagon/fsub.ll
new file mode 100644
index 000000000000..07c866f4c2e2
--- /dev/null
+++ b/test/CodeGen/Hexagon/fsub.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; Check that we generate sp floating point subtract in V5.
+
+; CHECK: r{{[0-9]+}} = sfsub(r{{[0-9]+}}, r{{[0-9]+}})
+
+define i32 @main() nounwind {
+entry:
+ %a = alloca float, align 4
+ %b = alloca float, align 4
+ %c = alloca float, align 4
+ store float 0x402ECCCCC0000000, float* %a, align 4
+ store float 0x4022333340000000, float* %b, align 4
+ %0 = load float* %b, align 4
+ %1 = load float* %a, align 4
+ %sub = fsub float %0, %1
+ store float %sub, float* %c, align 4
+ ret i32 0
+}
diff --git a/test/CodeGen/Hexagon/fusedandshift.ll b/test/CodeGen/Hexagon/fusedandshift.ll
new file mode 100644
index 000000000000..022b3c673458
--- /dev/null
+++ b/test/CodeGen/Hexagon/fusedandshift.ll
@@ -0,0 +1,16 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate fused logical and with shift instruction.
+
+; CHECK: r{{[0-9]+}} = and(#15, lsr(r{{[0-9]+}}, #{{[0-9]+}})
+
+define i32 @main(i16* %a, i16* %b) nounwind {
+ entry:
+ %0 = load i16* %a, align 2
+ %conv1 = sext i16 %0 to i32
+ %shr1 = ashr i32 %conv1, 3
+ %and1 = and i32 %shr1, 15
+ %conv2 = trunc i32 %and1 to i16
+ store i16 %conv2, i16* %b, align 2
+ ret i32 0
+}
+
diff --git a/test/CodeGen/Hexagon/macint.ll b/test/CodeGen/Hexagon/macint.ll
new file mode 100644
index 000000000000..b3b9d0ee7a01
--- /dev/null
+++ b/test/CodeGen/Hexagon/macint.ll
@@ -0,0 +1,14 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate integer multiply accumulate.
+
+; CHECK: r{{[0-9]+}} += mpyi(r{{[0-9]+}}, r{{[0-9]+}})
+
+define i32 @main(i32* %a, i32* %b) nounwind {
+ entry:
+ %0 = load i32* %a, align 4
+ %div = udiv i32 %0, 10000
+ %rem = urem i32 %div, 10
+ store i32 %rem, i32* %b, align 4
+ ret i32 0
+}
+
diff --git a/test/CodeGen/Hexagon/mpy.ll b/test/CodeGen/Hexagon/mpy.ll
index afd6fc607188..d5c5ae345352 100644
--- a/test/CodeGen/Hexagon/mpy.ll
+++ b/test/CodeGen/Hexagon/mpy.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; CHECK: += mpyi
define void @foo(i32 %acc, i32 %num, i32 %num2) nounwind {
diff --git a/test/CodeGen/Hexagon/newvaluejump.ll b/test/CodeGen/Hexagon/newvaluejump.ll
new file mode 100644
index 000000000000..9c7ca55cb8f6
--- /dev/null
+++ b/test/CodeGen/Hexagon/newvaluejump.ll
@@ -0,0 +1,33 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate new value jump.
+
+@i = global i32 0, align 4
+@j = global i32 10, align 4
+
+define i32 @foo(i32 %a) nounwind {
+entry:
+; CHECK: if (cmp.eq(r{{[0-9]+}}.new, #0)) jump{{.}}
+ %addr1 = alloca i32, align 4
+ %addr2 = alloca i32, align 4
+ %0 = load i32* @i, align 4
+ store i32 %0, i32* %addr1, align 4
+ call void @bar(i32 1, i32 2)
+ %1 = load i32* @j, align 4
+ %tobool = icmp ne i32 %1, 0
+ br i1 %tobool, label %if.then, label %if.else
+
+if.then:
+ call void @baz(i32 1, i32 2)
+ br label %if.end
+
+if.else:
+ call void @guy(i32 10, i32 20)
+ br label %if.end
+
+if.end:
+ ret i32 0
+}
+
+declare void @guy(i32, i32)
+declare void @bar(i32, i32)
+declare void @baz(i32, i32)
diff --git a/test/CodeGen/Hexagon/newvaluejump2.ll b/test/CodeGen/Hexagon/newvaluejump2.ll
new file mode 100644
index 000000000000..3d50ea5422c7
--- /dev/null
+++ b/test/CodeGen/Hexagon/newvaluejump2.ll
@@ -0,0 +1,30 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate new value jump, both registers, with one
+; of the registers as new.
+
+@Reg = common global i8 0, align 1
+define i32 @main() nounwind {
+entry:
+; CHECK: if (cmp.gt(r{{[0-9]+}}.new, r{{[0-9]+}})) jump:{{[t|nt]}} .LBB{{[0-9]+}}_{{[0-9]+}}
+ %Reg2 = alloca i8, align 1
+ %0 = load i8* %Reg2, align 1
+ %conv0 = zext i8 %0 to i32
+ %1 = load i8* @Reg, align 1
+ %conv1 = zext i8 %1 to i32
+ %tobool = icmp sle i32 %conv0, %conv1
+ br i1 %tobool, label %if.then, label %if.else
+
+if.then:
+ call void @bar(i32 1, i32 2)
+ br label %if.end
+
+if.else:
+ call void @baz(i32 10, i32 20)
+ br label %if.end
+
+if.end:
+ ret i32 0
+}
+
+declare void @bar(i32, i32)
+declare void @baz(i32, i32)
diff --git a/test/CodeGen/Hexagon/newvaluestore.ll b/test/CodeGen/Hexagon/newvaluestore.ll
new file mode 100644
index 000000000000..ab69b22df57c
--- /dev/null
+++ b/test/CodeGen/Hexagon/newvaluestore.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate new value store packet in V4
+
+@i = global i32 0, align 4
+@j = global i32 10, align 4
+@k = global i32 100, align 4
+
+define i32 @main() nounwind {
+entry:
+; CHECK: memw(r{{[0-9]+}} + #{{[0-9]+}}) = r{{[0-9]+}}.new
+ %number1 = alloca i32, align 4
+ %number2 = alloca i32, align 4
+ %number3 = alloca i32, align 4
+ %0 = load i32 * @i, align 4
+ store i32 %0, i32* %number1, align 4
+ %1 = load i32 * @j, align 4
+ store i32 %1, i32* %number2, align 4
+ %2 = load i32 * @k, align 4
+ store i32 %2, i32* %number3, align 4
+ ret i32 %0
+}
+
diff --git a/test/CodeGen/Hexagon/opt-fabs.ll b/test/CodeGen/Hexagon/opt-fabs.ll
new file mode 100644
index 000000000000..31b56fd6e982
--- /dev/null
+++ b/test/CodeGen/Hexagon/opt-fabs.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=hexagon-unknown-elf -mcpu=hexagonv5 < %s | FileCheck %s
+; Optimize fabsf to clrbit in V5.
+
+; CHECK: r{{[0-9]+}} = clrbit(r{{[0-9]+}}, #31)
+
+define float @my_fabsf(float %x) nounwind {
+entry:
+ %x.addr = alloca float, align 4
+ store float %x, float* %x.addr, align 4
+ %0 = load float* %x.addr, align 4
+ %call = call float @fabsf(float %0) readnone
+ ret float %call
+}
+
+declare float @fabsf(float)
diff --git a/test/CodeGen/Hexagon/opt-fneg.ll b/test/CodeGen/Hexagon/opt-fneg.ll
new file mode 100644
index 000000000000..479b4b64069a
--- /dev/null
+++ b/test/CodeGen/Hexagon/opt-fneg.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; Optimize fneg to togglebit in V5.
+
+define float @foo(float %x) nounwind {
+entry:
+; CHECK: r{{[0-9]+}} = togglebit(r{{[0-9]+}}, #31)
+ %x.addr = alloca float, align 4
+ store float %x, float* %x.addr, align 4
+ %0 = load float* %x.addr, align 4
+ %sub = fsub float -0.000000e+00, %0
+ ret float %sub
+}
+
+define float @bar(float %x) nounwind {
+entry:
+; CHECK: r{{[0-9]+}} = togglebit(r{{[0-9]+}}, #31)
+ %sub = fsub float -0.000000e+00, %x
+ ret float %sub
+}
+
+define float @baz(float %x) nounwind {
+entry:
+; CHECK: r{{[0-9]+}} = togglebit(r{{[0-9]+}}, #31)
+ %conv1 = fmul float %x, -1.000000e+00
+ ret float %conv1
+}
diff --git a/test/CodeGen/Hexagon/simpletailcall.ll b/test/CodeGen/Hexagon/simpletailcall.ll
new file mode 100644
index 000000000000..287640489a5e
--- /dev/null
+++ b/test/CodeGen/Hexagon/simpletailcall.ll
@@ -0,0 +1,14 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: foo_empty
+; CHECK-NOT: allocframe
+; CHECK-NOT: memd(r29
+; CHECK: jump bar_empty
+
+define void @foo_empty(i32 %h) nounwind {
+entry:
+ %add = add nsw i32 %h, 3
+ %call = tail call i32 bitcast (i32 (...)* @bar_empty to i32 (i32)*)(i32 %add) nounwind
+ ret void
+}
+
+declare i32 @bar_empty(...)
diff --git a/test/CodeGen/Hexagon/static.ll b/test/CodeGen/Hexagon/static.ll
index c63a3ba7fd74..2e4ab633e415 100644
--- a/test/CodeGen/Hexagon/static.ll
+++ b/test/CodeGen/Hexagon/static.ll
@@ -1,13 +1,12 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched < %s | FileCheck %s
@num = external global i32
@acc = external global i32
@val = external global i32
-; CHECK: CONST32(#num)
-; CHECK: CONST32(#acc)
-; CHECK: CONST32(#val)
+; CHECK: memw(##num)
+; CHECK: memw(##acc)
+; CHECK: memw(##val)
define void @foo() nounwind {
entry:
diff --git a/test/CodeGen/Hexagon/struct_args.ll b/test/CodeGen/Hexagon/struct_args.ll
index 2c962d0961d4..e488f33c3d16 100644
--- a/test/CodeGen/Hexagon/struct_args.ll
+++ b/test/CodeGen/Hexagon/struct_args.ll
@@ -1,6 +1,6 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
-; CHECK: r1:0 = or(r{{[0-9]}}:{{[0-9]}}, r{{[0-9]}}:{{[0-9]}})
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: r{{[0-9]}}:{{[0-9]}} = combine(r{{[0-9]}}, r{{[0-9]}})
+; CHECK: r{{[0-9]}}:{{[0-9]}} |= asl(r{{[0-9]}}:{{[0-9]}}, #32)
%struct.small = type { i32, i32 }
diff --git a/test/CodeGen/Hexagon/struct_args_large.ll b/test/CodeGen/Hexagon/struct_args_large.ll
index 69de4f66a92b..f09fd10cc84d 100644
--- a/test/CodeGen/Hexagon/struct_args_large.ll
+++ b/test/CodeGen/Hexagon/struct_args_large.ll
@@ -1,8 +1,7 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; CHECK: r[[T0:[0-9]+]] = CONST32(#s2)
-; CHECK: r[[T1:[0-9]+]] = memw(r[[T0]] + #0)
-; CHECK: memw(r29 + #0) = r[[T1]]
+; CHECK: memw(r29 + #0) = r{{.}}
+; CHECK: memw(r29+#8) = r{{.}}
%struct.large = type { i64, i64 }
diff --git a/test/CodeGen/Hexagon/vaddh.ll b/test/CodeGen/Hexagon/vaddh.ll
index 788e4749f5a2..01d20410978e 100644
--- a/test/CodeGen/Hexagon/vaddh.ll
+++ b/test/CodeGen/Hexagon/vaddh.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; CHECK: vaddh(r{{[0-9]+}}, r{{[0-9]+}})
@j = external global i32
diff --git a/test/CodeGen/MSP430/2009-12-21-FrameAddr.ll b/test/CodeGen/MSP430/2009-12-21-FrameAddr.ll
index b92477bed578..c3d69c7c0db5 100644
--- a/test/CodeGen/MSP430/2009-12-21-FrameAddr.ll
+++ b/test/CodeGen/MSP430/2009-12-21-FrameAddr.ll
@@ -5,9 +5,9 @@ target triple = "msp430-unknown-linux-gnu"
define msp430_intrcc void @foo() nounwind {
entry:
- %fa = call i16* @llvm.frameaddress(i32 0)
- store i16 0, i16* %fa
+ %fa = call i8* @llvm.frameaddress(i32 0)
+ store i8 0, i8* %fa
ret void
}
-declare i16* @llvm.frameaddress(i32)
+declare i8* @llvm.frameaddress(i32)
diff --git a/test/CodeGen/MSP430/Inst8rr.ll b/test/CodeGen/MSP430/Inst8rr.ll
index 45342e2ee9b1..b9c17d91ef5b 100644
--- a/test/CodeGen/MSP430/Inst8rr.ll
+++ b/test/CodeGen/MSP430/Inst8rr.ll
@@ -4,7 +4,7 @@ target triple = "msp430-generic-generic"
define i8 @mov(i8 %a, i8 %b) nounwind {
; CHECK: mov:
-; CHECK: mov.b r14, r15
+; CHECK: mov.{{[bw]}} r14, r15
ret i8 %b
}
diff --git a/test/CodeGen/Mips/2008-07-23-fpcmp.ll b/test/CodeGen/Mips/2008-07-23-fpcmp.ll
index 519e4b93a72b..9c547f15c9a6 100644
--- a/test/CodeGen/Mips/2008-07-23-fpcmp.ll
+++ b/test/CodeGen/Mips/2008-07-23-fpcmp.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -march=mips -o %t
-; RUN: grep {c\\..*\\.s} %t | count 3
-; RUN: grep {bc1\[tf\]} %t | count 3
+; RUN: grep "c\..*\.s" %t | count 3
+; RUN: grep "bc1[tf]" %t | count 3
; FIXME: Disabled because branch instructions are generated where
; conditional move instructions are expected.
diff --git a/test/CodeGen/Mips/2008-07-29-icmp.ll b/test/CodeGen/Mips/2008-07-29-icmp.ll
index e85a749f7dcd..e88e3d3755c4 100644
--- a/test/CodeGen/Mips/2008-07-29-icmp.ll
+++ b/test/CodeGen/Mips/2008-07-29-icmp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=mips | grep {b\[ne\]\[eq\]} | count 1
+; RUN: llc < %s -march=mips | grep "b[ne][eq]" | count 1
; FIXME: Disabled because branch instructions are generated where
; conditional move instructions are expected.
diff --git a/test/CodeGen/Mips/2010-07-20-Switch.ll b/test/CodeGen/Mips/2010-07-20-Switch.ll
index aaf6767a3bda..261fe9db1732 100644
--- a/test/CodeGen/Mips/2010-07-20-Switch.ll
+++ b/test/CodeGen/Mips/2010-07-20-Switch.ll
@@ -7,19 +7,22 @@ entry:
%x = alloca i32, align 4 ; <i32*> [#uses=2]
store volatile i32 2, i32* %x, align 4
%0 = load volatile i32* %x, align 4 ; <i32> [#uses=1]
-; STATIC-O32: lui $[[R0:[0-9]+]], %hi($JTI0_0)
-; STATIC-O32: addiu ${{[0-9]+}}, $[[R0]], %lo($JTI0_0)
-; STATIC-O32: sll ${{[0-9]+}}, ${{[0-9]+}}, 2
-; PIC-O32: lw $[[R0:[0-9]+]], %got($JTI0_0)
-; PIC-O32: addiu ${{[0-9]+}}, $[[R0]], %lo($JTI0_0)
-; PIC-O32: sll ${{[0-9]+}}, ${{[0-9]+}}, 2
-; PIC-O32: addu $[[R1:[0-9]+]], ${{[0-9]+}}, $gp
-; PIC-O32: jr $[[R1]]
-; PIC-N64: ld $[[R0:[0-9]+]], %got_page($JTI0_0)
-; PIC-N64: daddiu ${{[0-9]+}}, $[[R0]], %got_ofst($JTI0_0)
-; PIC-N64: dsll ${{[0-9]+}}, ${{[0-9]+}}, 3
-; PIC-N64: daddu $[[R1:[0-9]+]], ${{[0-9]+}}, $gp
-; PIC-N64: jr $[[R1]]
+; STATIC-O32: sll $[[R0:[0-9]+]], ${{[0-9]+}}, 2
+; STATIC-O32: lui $[[R1:[0-9]+]], %hi($JTI0_0)
+; STATIC-O32: addu $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; STATIC-O32: lw $[[R3:[0-9]+]], %lo($JTI0_0)($[[R2]])
+; PIC-O32: sll $[[R0:[0-9]+]], ${{[0-9]+}}, 2
+; PIC-O32: lw $[[R1:[0-9]+]], %got($JTI0_0)
+; PIC-O32: addu $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; PIC-O32: lw $[[R4:[0-9]+]], %lo($JTI0_0)($[[R2]])
+; PIC-O32: addu $[[R5:[0-9]+]], $[[R4:[0-9]+]]
+; PIC-O32: jr $[[R5]]
+; PIC-N64: dsll $[[R0:[0-9]+]], ${{[0-9]+}}, 3
+; PIC-N64: ld $[[R1:[0-9]+]], %got_page($JTI0_0)
+; PIC-N64: daddu $[[R2:[0-9]+]], $[[R0:[0-9]+]], $[[R1]]
+; PIC-N64: ld $[[R4:[0-9]+]], %got_ofst($JTI0_0)($[[R2]])
+; PIC-N64: daddu $[[R5:[0-9]+]], $[[R4:[0-9]+]]
+; PIC-N64: jr $[[R5]]
switch i32 %0, label %bb4 [
i32 0, label %bb5
i32 1, label %bb1
@@ -30,7 +33,6 @@ entry:
bb1: ; preds = %entry
ret i32 2
-; CHECK: STATIC-O32: $BB0_2
bb2: ; preds = %entry
ret i32 0
diff --git a/test/CodeGen/Mips/alloca.ll b/test/CodeGen/Mips/alloca.ll
index 15c73e225300..29f43c8afa18 100644
--- a/test/CodeGen/Mips/alloca.ll
+++ b/test/CodeGen/Mips/alloca.ll
@@ -4,14 +4,10 @@ define i32 @twoalloca(i32 %size) nounwind {
entry:
; CHECK: subu $[[T0:[0-9]+]], $sp, $[[SZ:[0-9]+]]
; CHECK: addu $sp, $zero, $[[T0]]
-; CHECK: addiu $[[T1:[0-9]+]], $sp, [[OFF:[0-9]+]]
; CHECK: subu $[[T2:[0-9]+]], $sp, $[[SZ]]
; CHECK: addu $sp, $zero, $[[T2]]
-; CHECK: addiu $[[T3:[0-9]+]], $sp, [[OFF]]
-; CHECK: lw $[[T4:[0-9]+]], %call16(foo)($gp)
-; CHECK: addu $25, $zero, $[[T4]]
-; CHECK: addu $4, $zero, $[[T1]]
-; CHECK: jalr $25
+; CHECK: addu $4, $zero, $[[T0]]
+; CHECK: addu $4, $zero, $[[T2]]
%tmp1 = alloca i8, i32 %size, align 4
%add.ptr = getelementptr inbounds i8* %tmp1, i32 5
store i8 97, i8* %add.ptr, align 1
@@ -31,14 +27,9 @@ declare i32 @foo(i8*)
define i32 @alloca2(i32 %size) nounwind {
entry:
-; dynamic allocated stack area and $gp restore slot have the same offsets
-; relative to $sp.
-;
; CHECK: alloca2
-; CHECK: .cprestore [[OFF:[0-9]+]]
-; CHECK: subu $[[T0:[0-9]+]], $sp, $[[SZ:[0-9]+]]
+; CHECK: subu $[[T0:[0-9]+]], $sp
; CHECK: addu $sp, $zero, $[[T0]]
-; CHECK: addiu $[[T1:[0-9]+]], $sp, [[OFF]]
%tmp1 = alloca i8, i32 %size, align 4
%0 = bitcast i8* %tmp1 to i32*
@@ -46,7 +37,7 @@ entry:
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
-; CHECK: addiu $4, $[[T1]], 40
+; CHECK: addiu $4, $[[T0]], 40
%add.ptr = getelementptr inbounds i8* %tmp1, i32 40
%1 = bitcast i8* %add.ptr to i32*
@@ -56,7 +47,7 @@ if.then: ; preds = %entry
br label %if.end
if.else: ; preds = %entry
-; CHECK: addiu $4, $[[T1]], 12
+; CHECK: addiu $4, $[[T0]], 12
%add.ptr5 = getelementptr inbounds i8* %tmp1, i32 12
%2 = bitcast i8* %add.ptr5 to i32*
@@ -64,7 +55,7 @@ if.else: ; preds = %entry
br label %if.end
if.end: ; preds = %if.else, %if.then
-; CHECK: lw $5, 0($[[T1]])
+; CHECK: lw $5, 0($[[T0]])
; CHECK: lw $25, %call16(printf)
%.pre-phi = phi i32* [ %2, %if.else ], [ %.pre, %if.then ]
diff --git a/test/CodeGen/Mips/analyzebranch.ll b/test/CodeGen/Mips/analyzebranch.ll
index bc5bcc391ba3..8ec5d9313994 100644
--- a/test/CodeGen/Mips/analyzebranch.ll
+++ b/test/CodeGen/Mips/analyzebranch.ll
@@ -2,9 +2,8 @@
define double @foo(double %a, double %b) nounwind readnone {
entry:
-; CHECK: bc1f $BB0_2
+; CHECK: bc1f $BB
; CHECK: nop
-; CHECK: # BB#1:
%cmp = fcmp ogt double %a, 0.000000e+00
br i1 %cmp, label %if.end6, label %if.else
@@ -26,9 +25,8 @@ return: ; preds = %if.else, %if.end6
define void @f1(float %f) nounwind {
entry:
-; CHECK: bc1f $BB1_1
+; CHECK: bc1f $BB
; CHECK: nop
-; CHECK: # BB#2:
%cmp = fcmp une float %f, 0.000000e+00
br i1 %cmp, label %if.then, label %if.end
diff --git a/test/CodeGen/Mips/and1.ll b/test/CodeGen/Mips/and1.ll
new file mode 100644
index 000000000000..4ff1204fe7ae
--- /dev/null
+++ b/test/CodeGen/Mips/and1.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@x = global i32 65504, align 4
+@y = global i32 60929, align 4
+@.str = private unnamed_addr constant [7 x i8] c"%08x \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+ %0 = load i32* @x, align 4
+ %1 = load i32* @y, align 4
+ %and = and i32 %0, %1
+; 16: and ${{[0-9]+}}, ${{[0-9]+}}
+ %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), i32 %and)
+ ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/asm-large-immediate.ll b/test/CodeGen/Mips/asm-large-immediate.ll
new file mode 100644
index 000000000000..246fff615edb
--- /dev/null
+++ b/test/CodeGen/Mips/asm-large-immediate.ll
@@ -0,0 +1,10 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+define void @test() {
+entry:
+; CHECK: /* result: 68719476738 */
+ tail call void asm sideeffect "/* result: ${0:c} */", "i,~{dirflag},~{fpsr},~{flags}"( i64 68719476738 )
+; CHECK: /* result: -68719476738 */
+ tail call void asm sideeffect "/* result: ${0:n} */", "i,~{dirflag},~{fpsr},~{flags}"( i64 68719476738 )
+ ret void
+}
+
diff --git a/test/CodeGen/Mips/atomic.ll b/test/CodeGen/Mips/atomic.ll
index a4763b130d46..050689dcea6c 100644
--- a/test/CodeGen/Mips/atomic.ll
+++ b/test/CodeGen/Mips/atomic.ll
@@ -8,7 +8,7 @@ entry:
ret i32 %0
; CHECK: AtomicLoadAdd32:
-; CHECK: lw $[[R0:[0-9]+]], %got(x)($gp)
+; CHECK: lw $[[R0:[0-9]+]], %got(x)
; CHECK: $[[BB0:[A-Z_0-9]+]]:
; CHECK: ll $[[R1:[0-9]+]], 0($[[R0]])
; CHECK: addu $[[R2:[0-9]+]], $[[R1]], $4
@@ -22,7 +22,7 @@ entry:
ret i32 %0
; CHECK: AtomicLoadNand32:
-; CHECK: lw $[[R0:[0-9]+]], %got(x)($gp)
+; CHECK: lw $[[R0:[0-9]+]], %got(x)
; CHECK: $[[BB0:[A-Z_0-9]+]]:
; CHECK: ll $[[R1:[0-9]+]], 0($[[R0]])
; CHECK: and $[[R3:[0-9]+]], $[[R1]], $4
@@ -40,7 +40,7 @@ entry:
ret i32 %0
; CHECK: AtomicSwap32:
-; CHECK: lw $[[R0:[0-9]+]], %got(x)($gp)
+; CHECK: lw $[[R0:[0-9]+]], %got(x)
; CHECK: $[[BB0:[A-Z_0-9]+]]:
; CHECK: ll ${{[0-9]+}}, 0($[[R0]])
; CHECK: sc $[[R2:[0-9]+]], 0($[[R0]])
@@ -56,7 +56,7 @@ entry:
ret i32 %0
; CHECK: AtomicCmpSwap32:
-; CHECK: lw $[[R0:[0-9]+]], %got(x)($gp)
+; CHECK: lw $[[R0:[0-9]+]], %got(x)
; CHECK: $[[BB0:[A-Z_0-9]+]]:
; CHECK: ll $2, 0($[[R0]])
; CHECK: bne $2, $4, $[[BB1:[A-Z_0-9]+]]
@@ -75,7 +75,7 @@ entry:
ret i8 %0
; CHECK: AtomicLoadAdd8:
-; CHECK: lw $[[R0:[0-9]+]], %got(y)($gp)
+; CHECK: lw $[[R0:[0-9]+]], %got(y)
; CHECK: addiu $[[R1:[0-9]+]], $zero, -4
; CHECK: and $[[R2:[0-9]+]], $[[R0]], $[[R1]]
; CHECK: andi $[[R3:[0-9]+]], $[[R0]], 3
@@ -106,7 +106,7 @@ entry:
ret i8 %0
; CHECK: AtomicLoadSub8:
-; CHECK: lw $[[R0:[0-9]+]], %got(y)($gp)
+; CHECK: lw $[[R0:[0-9]+]], %got(y)
; CHECK: addiu $[[R1:[0-9]+]], $zero, -4
; CHECK: and $[[R2:[0-9]+]], $[[R0]], $[[R1]]
; CHECK: andi $[[R3:[0-9]+]], $[[R0]], 3
@@ -137,7 +137,7 @@ entry:
ret i8 %0
; CHECK: AtomicLoadNand8:
-; CHECK: lw $[[R0:[0-9]+]], %got(y)($gp)
+; CHECK: lw $[[R0:[0-9]+]], %got(y)
; CHECK: addiu $[[R1:[0-9]+]], $zero, -4
; CHECK: and $[[R2:[0-9]+]], $[[R0]], $[[R1]]
; CHECK: andi $[[R3:[0-9]+]], $[[R0]], 3
@@ -169,7 +169,7 @@ entry:
ret i8 %0
; CHECK: AtomicSwap8:
-; CHECK: lw $[[R0:[0-9]+]], %got(y)($gp)
+; CHECK: lw $[[R0:[0-9]+]], %got(y)
; CHECK: addiu $[[R1:[0-9]+]], $zero, -4
; CHECK: and $[[R2:[0-9]+]], $[[R0]], $[[R1]]
; CHECK: andi $[[R3:[0-9]+]], $[[R0]], 3
@@ -198,7 +198,7 @@ entry:
ret i8 %0
; CHECK: AtomicCmpSwap8:
-; CHECK: lw $[[R0:[0-9]+]], %got(y)($gp)
+; CHECK: lw $[[R0:[0-9]+]], %got(y)
; CHECK: addiu $[[R1:[0-9]+]], $zero, -4
; CHECK: and $[[R2:[0-9]+]], $[[R0]], $[[R1]]
; CHECK: andi $[[R3:[0-9]+]], $[[R0]], 3
@@ -242,3 +242,19 @@ entry:
; CHECK: sync 0
}
+; make sure that this assertion in
+; TwoAddressInstructionPass::TryInstructionTransform does not fail:
+;
+; line 1203: assert(TargetRegisterInfo::isVirtualRegister(regB) &&
+;
+; it failed when MipsDAGToDAGISel::ReplaceUsesWithZeroReg replaced an
+; operand of an atomic instruction with register $zero.
+@a = external global i32
+
+define i32 @zeroreg() nounwind {
+entry:
+ %0 = cmpxchg i32* @a, i32 1, i32 0 seq_cst
+ %1 = icmp eq i32 %0, 1
+ %conv = zext i1 %1 to i32
+ ret i32 %conv
+}
diff --git a/test/CodeGen/Mips/cmov.ll b/test/CodeGen/Mips/cmov.ll
index 03254a9a799a..3af899a4e258 100755
--- a/test/CodeGen/Mips/cmov.ll
+++ b/test/CodeGen/Mips/cmov.ll
@@ -5,10 +5,12 @@
@i1 = global [3 x i32] [i32 1, i32 2, i32 3], align 4
@i3 = common global i32* null, align 4
-; O32: lw ${{[0-9]+}}, %got(i3)($gp)
-; O32: addiu ${{[0-9]+}}, $gp, %got(i1)
-; N64: ld ${{[0-9]+}}, %got_disp(i3)($gp)
-; N64: daddiu ${{[0-9]+}}, $gp, %got_disp(i1)
+; O32: lw $[[R0:[0-9]+]], %got(i3)
+; O32: addiu $[[R1:[0-9]+]], ${{[0-9]+}}, %got(i1)
+; O32: movn $[[R0]], $[[R1]], ${{[0-9]+}}
+; N64: ldr $[[R0:[0-9]+]]
+; N64: ld $[[R1:[0-9]+]], %got_disp(i1)
+; N64: movn $[[R0]], $[[R1]], ${{[0-9]+}}
define i32* @cmov1(i32 %s) nounwind readonly {
entry:
%tobool = icmp ne i32 %s, 0
@@ -21,12 +23,12 @@ entry:
@d = global i32 0, align 4
; O32: cmov2:
-; O32: addiu $[[R1:[0-9]+]], $gp, %got(d)
-; O32: addiu $[[R0:[0-9]+]], $gp, %got(c)
+; O32: addiu $[[R1:[0-9]+]], ${{[a-z0-9]+}}, %got(d)
+; O32: addiu $[[R0:[0-9]+]], ${{[a-z0-9]+}}, %got(c)
; O32: movn $[[R1]], $[[R0]], ${{[0-9]+}}
; N64: cmov2:
-; N64: daddiu $[[R1:[0-9]+]], $gp, %got_disp(d)
-; N64: daddiu $[[R0:[0-9]+]], $gp, %got_disp(c)
+; N64: daddiu $[[R1:[0-9]+]], ${{[0-9]+}}, %got_disp(d)
+; N64: daddiu $[[R0:[0-9]+]], ${{[0-9]+}}, %got_disp(c)
; N64: movn $[[R1]], $[[R0]], ${{[0-9]+}}
define i32 @cmov2(i32 %s) nounwind readonly {
entry:
@@ -37,3 +39,23 @@ entry:
ret i32 %cond
}
+; O32: cmov3:
+; O32: xori $[[R0:[0-9]+]], ${{[0-9]+}}, 234
+; O32: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+define i32 @cmov3(i32 %a, i32 %b, i32 %c) nounwind readnone {
+entry:
+ %cmp = icmp eq i32 %a, 234
+ %cond = select i1 %cmp, i32 %b, i32 %c
+ ret i32 %cond
+}
+
+; N64: cmov4:
+; N64: xori $[[R0:[0-9]+]], ${{[0-9]+}}, 234
+; N64: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+define i64 @cmov4(i32 %a, i64 %b, i64 %c) nounwind readnone {
+entry:
+ %cmp = icmp eq i32 %a, 234
+ %cond = select i1 %cmp, i64 %b, i64 %c
+ ret i64 %cond
+}
+
diff --git a/test/CodeGen/Mips/cprestore.ll b/test/CodeGen/Mips/cprestore.ll
index 57d022f47c82..a618b675c559 100644
--- a/test/CodeGen/Mips/cprestore.ll
+++ b/test/CodeGen/Mips/cprestore.ll
@@ -1,4 +1,6 @@
-; RUN: llc -march=mipsel < %s | FileCheck %s
+; DISABLE: llc -march=mipsel < %s | FileCheck %s
+; RUN: false
+; XFAIL: *
; CHECK: .set macro
; CHECK: .set at
diff --git a/test/CodeGen/Mips/eh.ll b/test/CodeGen/Mips/eh.ll
index 2e2f9a451ed1..d14150a68a56 100644
--- a/test/CodeGen/Mips/eh.ll
+++ b/test/CodeGen/Mips/eh.ll
@@ -15,7 +15,6 @@ entry:
; CHECK-EB: .cfi_offset 53, -8
; CHECK-EB: .cfi_offset 52, -4
; CHECK-EL: .cfi_offset 31, -12
-; CHECK-EL: .cprestore
%exception = tail call i8* @__cxa_allocate_exception(i32 8) nounwind
%0 = bitcast i8* %exception to double*
@@ -25,7 +24,6 @@ entry:
lpad: ; preds = %entry
; CHECK-EL: # %lpad
-; CHECK-EL: lw $gp
; CHECK-EL: bne $5
%exn.val = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
diff --git a/test/CodeGen/Mips/fabs.ll b/test/CodeGen/Mips/fabs.ll
index b296ab390d56..49d8a7201e8b 100644
--- a/test/CodeGen/Mips/fabs.ll
+++ b/test/CodeGen/Mips/fabs.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=mipsel -mcpu=mips32 | FileCheck %s -check-prefix=32
-; RUN: llc < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2
-; RUN: llc < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=64
-; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2
-; RUN: llc < %s -march=mipsel -mcpu=mips32 -enable-no-nans-fp-math | FileCheck %s -check-prefix=NO-NAN
+; RUN: llc < %s -mtriple=mipsel-linux-gnu -mcpu=mips32 | FileCheck %s -check-prefix=32
+; RUN: llc < %s -mtriple=mipsel-linux-gnu -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2
+; RUN: llc < %s -mtriple=mips64el-linux-gnu -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=64
+; RUN: llc < %s -mtriple=mips64el-linux-gnu -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2
+; RUN: llc < %s -mtriple=mipsel-linux-gnu -mcpu=mips32 -enable-no-nans-fp-math | FileCheck %s -check-prefix=NO-NAN
define float @foo0(float %a) nounwind readnone {
entry:
diff --git a/test/CodeGen/Mips/fastcc.ll b/test/CodeGen/Mips/fastcc.ll
new file mode 100644
index 000000000000..82919e7139bd
--- /dev/null
+++ b/test/CodeGen/Mips/fastcc.ll
@@ -0,0 +1,253 @@
+; RUN: llc < %s -march=mipsel | FileCheck %s
+
+@gi0 = external global i32
+@gi1 = external global i32
+@gi2 = external global i32
+@gi3 = external global i32
+@gi4 = external global i32
+@gi5 = external global i32
+@gi6 = external global i32
+@gi7 = external global i32
+@gi8 = external global i32
+@gi9 = external global i32
+@gi10 = external global i32
+@gi11 = external global i32
+@gi12 = external global i32
+@gi13 = external global i32
+@gi14 = external global i32
+@gi15 = external global i32
+@gi16 = external global i32
+@gfa0 = external global float
+@gfa1 = external global float
+@gfa2 = external global float
+@gfa3 = external global float
+@gfa4 = external global float
+@gfa5 = external global float
+@gfa6 = external global float
+@gfa7 = external global float
+@gfa8 = external global float
+@gfa9 = external global float
+@gfa10 = external global float
+@gfa11 = external global float
+@gfa12 = external global float
+@gfa13 = external global float
+@gfa14 = external global float
+@gfa15 = external global float
+@gfa16 = external global float
+@gfa17 = external global float
+@gfa18 = external global float
+@gfa19 = external global float
+@gfa20 = external global float
+@gf0 = external global float
+@gf1 = external global float
+@gf2 = external global float
+@gf3 = external global float
+@gf4 = external global float
+@gf5 = external global float
+@gf6 = external global float
+@gf7 = external global float
+@gf8 = external global float
+@gf9 = external global float
+@gf10 = external global float
+@gf11 = external global float
+@gf12 = external global float
+@gf13 = external global float
+@gf14 = external global float
+@gf15 = external global float
+@gf16 = external global float
+@gf17 = external global float
+@gf18 = external global float
+@gf19 = external global float
+@gf20 = external global float
+@g0 = external global i32
+@g1 = external global i32
+@g2 = external global i32
+@g3 = external global i32
+@g4 = external global i32
+@g5 = external global i32
+@g6 = external global i32
+@g7 = external global i32
+@g8 = external global i32
+@g9 = external global i32
+@g10 = external global i32
+@g11 = external global i32
+@g12 = external global i32
+@g13 = external global i32
+@g14 = external global i32
+@g15 = external global i32
+@g16 = external global i32
+
+define void @caller0() nounwind {
+entry:
+; CHECK: caller0
+; CHECK: lw $3
+; CHECK: lw $24
+; CHECK: lw $15
+; CHECK: lw $14
+; CHECK: lw $13
+; CHECK: lw $12
+; CHECK: lw $11
+; CHECK: lw $10
+; CHECK: lw $9
+; CHECK: lw $8
+; CHECK: lw $7
+; CHECK: lw $6
+; CHECK: lw $5
+; CHECK: lw $4
+
+ %0 = load i32* @gi0, align 4
+ %1 = load i32* @gi1, align 4
+ %2 = load i32* @gi2, align 4
+ %3 = load i32* @gi3, align 4
+ %4 = load i32* @gi4, align 4
+ %5 = load i32* @gi5, align 4
+ %6 = load i32* @gi6, align 4
+ %7 = load i32* @gi7, align 4
+ %8 = load i32* @gi8, align 4
+ %9 = load i32* @gi9, align 4
+ %10 = load i32* @gi10, align 4
+ %11 = load i32* @gi11, align 4
+ %12 = load i32* @gi12, align 4
+ %13 = load i32* @gi13, align 4
+ %14 = load i32* @gi14, align 4
+ %15 = load i32* @gi15, align 4
+ %16 = load i32* @gi16, align 4
+ tail call fastcc void @callee0(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12, i32 %13, i32 %14, i32 %15, i32 %16)
+ ret void
+}
+
+define internal fastcc void @callee0(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9, i32 %a10, i32 %a11, i32 %a12, i32 %a13, i32 %a14, i32 %a15, i32 %a16) nounwind noinline {
+entry:
+; CHECK: callee0
+; CHECK: sw $4
+; CHECK: sw $5
+; CHECK: sw $6
+; CHECK: sw $7
+; CHECK: sw $8
+; CHECK: sw $9
+; CHECK: sw $10
+; CHECK: sw $11
+; CHECK: sw $12
+; CHECK: sw $13
+; CHECK: sw $14
+; CHECK: sw $15
+; CHECK: sw $24
+; CHECK: sw $3
+
+ store i32 %a0, i32* @g0, align 4
+ store i32 %a1, i32* @g1, align 4
+ store i32 %a2, i32* @g2, align 4
+ store i32 %a3, i32* @g3, align 4
+ store i32 %a4, i32* @g4, align 4
+ store i32 %a5, i32* @g5, align 4
+ store i32 %a6, i32* @g6, align 4
+ store i32 %a7, i32* @g7, align 4
+ store i32 %a8, i32* @g8, align 4
+ store i32 %a9, i32* @g9, align 4
+ store i32 %a10, i32* @g10, align 4
+ store i32 %a11, i32* @g11, align 4
+ store i32 %a12, i32* @g12, align 4
+ store i32 %a13, i32* @g13, align 4
+ store i32 %a14, i32* @g14, align 4
+ store i32 %a15, i32* @g15, align 4
+ store i32 %a16, i32* @g16, align 4
+ ret void
+}
+
+define void @caller1(float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7, float %a8, float %a9, float %a10, float %a11, float %a12, float %a13, float %a14, float %a15, float %a16, float %a17, float %a18, float %a19, float %a20) nounwind {
+entry:
+; CHECK: caller1
+; CHECK: lwc1 $f19
+; CHECK: lwc1 $f18
+; CHECK: lwc1 $f17
+; CHECK: lwc1 $f16
+; CHECK: lwc1 $f15
+; CHECK: lwc1 $f14
+; CHECK: lwc1 $f13
+; CHECK: lwc1 $f12
+; CHECK: lwc1 $f11
+; CHECK: lwc1 $f10
+; CHECK: lwc1 $f9
+; CHECK: lwc1 $f8
+; CHECK: lwc1 $f7
+; CHECK: lwc1 $f6
+; CHECK: lwc1 $f5
+; CHECK: lwc1 $f4
+; CHECK: lwc1 $f3
+; CHECK: lwc1 $f2
+; CHECK: lwc1 $f1
+; CHECK: lwc1 $f0
+
+ %0 = load float* @gfa0, align 4
+ %1 = load float* @gfa1, align 4
+ %2 = load float* @gfa2, align 4
+ %3 = load float* @gfa3, align 4
+ %4 = load float* @gfa4, align 4
+ %5 = load float* @gfa5, align 4
+ %6 = load float* @gfa6, align 4
+ %7 = load float* @gfa7, align 4
+ %8 = load float* @gfa8, align 4
+ %9 = load float* @gfa9, align 4
+ %10 = load float* @gfa10, align 4
+ %11 = load float* @gfa11, align 4
+ %12 = load float* @gfa12, align 4
+ %13 = load float* @gfa13, align 4
+ %14 = load float* @gfa14, align 4
+ %15 = load float* @gfa15, align 4
+ %16 = load float* @gfa16, align 4
+ %17 = load float* @gfa17, align 4
+ %18 = load float* @gfa18, align 4
+ %19 = load float* @gfa19, align 4
+ %20 = load float* @gfa20, align 4
+ tail call fastcc void @callee1(float %0, float %1, float %2, float %3, float %4, float %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13, float %14, float %15, float %16, float %17, float %18, float %19, float %20)
+ ret void
+}
+
+define internal fastcc void @callee1(float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7, float %a8, float %a9, float %a10, float %a11, float %a12, float %a13, float %a14, float %a15, float %a16, float %a17, float %a18, float %a19, float %a20) nounwind noinline {
+entry:
+; CHECK: callee1
+; CHECK: swc1 $f0
+; CHECK: swc1 $f1
+; CHECK: swc1 $f2
+; CHECK: swc1 $f3
+; CHECK: swc1 $f4
+; CHECK: swc1 $f5
+; CHECK: swc1 $f6
+; CHECK: swc1 $f7
+; CHECK: swc1 $f8
+; CHECK: swc1 $f9
+; CHECK: swc1 $f10
+; CHECK: swc1 $f11
+; CHECK: swc1 $f12
+; CHECK: swc1 $f13
+; CHECK: swc1 $f14
+; CHECK: swc1 $f15
+; CHECK: swc1 $f16
+; CHECK: swc1 $f17
+; CHECK: swc1 $f18
+; CHECK: swc1 $f19
+
+ store float %a0, float* @gf0, align 4
+ store float %a1, float* @gf1, align 4
+ store float %a2, float* @gf2, align 4
+ store float %a3, float* @gf3, align 4
+ store float %a4, float* @gf4, align 4
+ store float %a5, float* @gf5, align 4
+ store float %a6, float* @gf6, align 4
+ store float %a7, float* @gf7, align 4
+ store float %a8, float* @gf8, align 4
+ store float %a9, float* @gf9, align 4
+ store float %a10, float* @gf10, align 4
+ store float %a11, float* @gf11, align 4
+ store float %a12, float* @gf12, align 4
+ store float %a13, float* @gf13, align 4
+ store float %a14, float* @gf14, align 4
+ store float %a15, float* @gf15, align 4
+ store float %a16, float* @gf16, align 4
+ store float %a17, float* @gf17, align 4
+ store float %a18, float* @gf18, align 4
+ store float %a19, float* @gf19, align 4
+ store float %a20, float* @gf20, align 4
+ ret void
+}
+
diff --git a/test/CodeGen/Mips/fp-indexed-ls.ll b/test/CodeGen/Mips/fp-indexed-ls.ll
index 08bd6e72ae77..1c4a3fdb4a42 100644
--- a/test/CodeGen/Mips/fp-indexed-ls.ll
+++ b/test/CodeGen/Mips/fp-indexed-ls.ll
@@ -28,7 +28,7 @@ entry:
define float @foo2(i32 %b, i32 %c) nounwind readonly {
entry:
-; CHECK: luxc1
+; CHECK-NOT: luxc1
%arrayidx1 = getelementptr inbounds [4 x %struct.S]* @s, i32 0, i32 %b, i32 0, i32 %c
%0 = load float* %arrayidx1, align 1
ret float %0
@@ -54,7 +54,7 @@ entry:
define void @foo5(i32 %b, i32 %c) nounwind {
entry:
-; CHECK: suxc1
+; CHECK-NOT: suxc1
%0 = load float* @gf, align 4
%arrayidx1 = getelementptr inbounds [4 x %struct.S]* @s, i32 0, i32 %b, i32 0, i32 %c
store float %0, float* %arrayidx1, align 1
@@ -64,7 +64,7 @@ entry:
define double @foo6(i32 %b, i32 %c) nounwind readonly {
entry:
; CHECK: foo6
-; CHECK-NOT: ldxc1
+; CHECK-NOT: luxc1
%arrayidx1 = getelementptr inbounds [4 x %struct.S2]* @s2, i32 0, i32 %b, i32 0, i32 %c
%0 = load double* %arrayidx1, align 1
ret double %0
@@ -73,7 +73,7 @@ entry:
define void @foo7(i32 %b, i32 %c) nounwind {
entry:
; CHECK: foo7
-; CHECK-NOT: sdxc1
+; CHECK-NOT: suxc1
%0 = load double* @gd, align 8
%arrayidx1 = getelementptr inbounds [4 x %struct.S2]* @s2, i32 0, i32 %b, i32 0, i32 %c
store double %0, double* %arrayidx1, align 1
@@ -83,7 +83,7 @@ entry:
define float @foo8() nounwind readonly {
entry:
; CHECK: foo8
-; CHECK: luxc1
+; CHECK-NOT: luxc1
%0 = load float* getelementptr inbounds (%struct.S3* @s3, i32 0, i32 1), align 1
ret float %0
}
@@ -91,7 +91,7 @@ entry:
define void @foo9(float %f) nounwind {
entry:
; CHECK: foo9
-; CHECK: suxc1
+; CHECK-NOT: suxc1
store float %f, float* getelementptr inbounds (%struct.S3* @s3, i32 0, i32 1), align 1
ret void
}
diff --git a/test/CodeGen/Mips/fp-spill-reload.ll b/test/CodeGen/Mips/fp-spill-reload.ll
new file mode 100644
index 000000000000..f9887a55827f
--- /dev/null
+++ b/test/CodeGen/Mips/fp-spill-reload.ll
@@ -0,0 +1,39 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+; check that $fp is not reserved.
+
+define void @foo0(i32* nocapture %b) nounwind {
+entry:
+; CHECK: sw $fp
+; CHECK: lw $fp
+ %0 = load i32* %b, align 4
+ %arrayidx.1 = getelementptr inbounds i32* %b, i32 1
+ %1 = load i32* %arrayidx.1, align 4
+ %add.1 = add nsw i32 %1, 1
+ %arrayidx.2 = getelementptr inbounds i32* %b, i32 2
+ %2 = load i32* %arrayidx.2, align 4
+ %add.2 = add nsw i32 %2, 2
+ %arrayidx.3 = getelementptr inbounds i32* %b, i32 3
+ %3 = load i32* %arrayidx.3, align 4
+ %add.3 = add nsw i32 %3, 3
+ %arrayidx.4 = getelementptr inbounds i32* %b, i32 4
+ %4 = load i32* %arrayidx.4, align 4
+ %add.4 = add nsw i32 %4, 4
+ %arrayidx.5 = getelementptr inbounds i32* %b, i32 5
+ %5 = load i32* %arrayidx.5, align 4
+ %add.5 = add nsw i32 %5, 5
+ %arrayidx.6 = getelementptr inbounds i32* %b, i32 6
+ %6 = load i32* %arrayidx.6, align 4
+ %add.6 = add nsw i32 %6, 6
+ %arrayidx.7 = getelementptr inbounds i32* %b, i32 7
+ %7 = load i32* %arrayidx.7, align 4
+ %add.7 = add nsw i32 %7, 7
+ call void @foo2(i32 %0, i32 %add.1, i32 %add.2, i32 %add.3, i32 %add.4, i32 %add.5, i32 %add.6, i32 %add.7) nounwind
+ call void bitcast (void (...)* @foo1 to void ()*)() nounwind
+ call void @foo2(i32 %0, i32 %add.1, i32 %add.2, i32 %add.3, i32 %add.4, i32 %add.5, i32 %add.6, i32 %add.7) nounwind
+ ret void
+}
+
+declare void @foo2(i32, i32, i32, i32, i32, i32, i32, i32)
+
+declare void @foo1(...)
+
diff --git a/test/CodeGen/Mips/global-pointer-reg.ll b/test/CodeGen/Mips/global-pointer-reg.ll
index 174d1f9cbe90..1c0eb01b67c1 100644
--- a/test/CodeGen/Mips/global-pointer-reg.ll
+++ b/test/CodeGen/Mips/global-pointer-reg.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -march=mipsel -mips-fix-global-base-reg=false | FileCheck %s
+; DISABLED: llc < %s -march=mipsel -mips-fix-global-base-reg=false | FileCheck %s
+; RUN: false
+; XFAIL: *
@g0 = external global i32
@g1 = external global i32
diff --git a/test/CodeGen/Mips/gprestore.ll b/test/CodeGen/Mips/gprestore.ll
index ee7e1315df0e..cbcf0c93491c 100644
--- a/test/CodeGen/Mips/gprestore.ll
+++ b/test/CodeGen/Mips/gprestore.ll
@@ -1,4 +1,6 @@
-; RUN: llc -march=mips < %s | FileCheck %s
+; DISABLE: llc -march=mips < %s | FileCheck %s
+; RUN: false
+; XFAIL: *
@p = external global i32
@q = external global i32
diff --git a/test/CodeGen/Mips/helloworld.ll b/test/CodeGen/Mips/helloworld.ll
new file mode 100644
index 000000000000..bee93accd428
--- /dev/null
+++ b/test/CodeGen/Mips/helloworld.ll
@@ -0,0 +1,34 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=C1
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=C2
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=PE
+;
+; re-enable this when mips16's jalr is fixed.
+; DISABLED: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=SR
+
+
+@.str = private unnamed_addr constant [13 x i8] c"hello world\0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+ %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0))
+ ret i32 0
+
+; SR: .set mips16 # @main
+
+; SR: save $ra, [[FS:[0-9]+]]
+; PE: li $[[T1:[0-9]+]], %hi(_gp_disp)
+; PE: addiu $[[T2:[0-9]+]], $pc, %lo(_gp_disp)
+; PE: sll $[[T3:[0-9]+]], $[[T1]], 16
+; C1: lw ${{[0-9]+}}, %got($.str)(${{[0-9]+}})
+; C2: lw ${{[0-9]+}}, %call16(printf)(${{[0-9]+}})
+; C1: addiu ${{[0-9]+}}, %lo($.str)
+; C2: move $25, ${{[0-9]+}}
+; C1: move $gp, ${{[0-9]+}}
+; C1: jalr ${{[0-9]+}}
+; SR: restore $ra, [[FS]]
+; PE: li $2, 0
+; PE: jr $ra
+
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-I-1.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-I-1.ll
new file mode 100644
index 000000000000..f9e53cbb07a4
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-I-1.ll
@@ -0,0 +1,15 @@
+;
+;This is a negative test. The constant value given for the constraint
+;is greater than 16 bits.
+;
+; RUN: not llc -march=mipsel < %s 2> %t
+; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+
+define i32 @main() nounwind {
+entry:
+
+;CHECK-ERRORS: error: invalid operand for inline asm constraint 'I'
+ tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,I"(i32 7, i32 1048576) nounwind
+ ret i32 0
+}
+
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-J.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-J.ll
new file mode 100644
index 000000000000..1fdf672fe197
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-J.ll
@@ -0,0 +1,16 @@
+;
+;This is a negative test. The constant value given for the constraint (J)
+;is non-zero (3).
+;
+; RUN: not llc -march=mipsel < %s 2> %t
+; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+
+define i32 @main() nounwind {
+entry:
+
+;CHECK-ERRORS: error: invalid operand for inline asm constraint 'J'
+
+ tail call i32 asm "addi $0,$1,$2", "=r,r,J"(i32 1024, i32 3) nounwind
+ ret i32 0
+}
+
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-K.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-K.ll
new file mode 100644
index 000000000000..3baf437324ac
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-K.ll
@@ -0,0 +1,16 @@
+;
+;This is a negative test. The constant value given for the constraint (K)
+;is greater than 16 bits (0x00100000).
+;
+; RUN: not llc -march=mipsel < %s 2> %t
+; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+
+define i32 @main() nounwind {
+entry:
+
+;CHECK-ERRORS: error: invalid operand for inline asm constraint 'K'
+
+ tail call i32 asm "addu $0,$1,$2", "=r,r,K"(i32 1024, i32 1048576) nounwind
+ ret i32 0
+}
+
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-L.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-L.ll
new file mode 100644
index 000000000000..49dcc8745857
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-L.ll
@@ -0,0 +1,16 @@
+;
+;This is a negative test. The constant value given for the constraint (L)
+;is non-zero in the lower 16 bits (0x00100003).
+;
+; RUN: not llc -march=mipsel < %s 2> %t
+; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+
+define i32 @main() nounwind {
+entry:
+
+;CHECK-ERRORS: error: invalid operand for inline asm constraint 'L'
+
+ tail call i32 asm "addi $0,$1,$2", "=r,r,L"(i32 7, i32 1048579) nounwind
+ ret i32 0
+}
+
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-N.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-N.ll
new file mode 100644
index 000000000000..770669d913e8
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-N.ll
@@ -0,0 +1,17 @@
+
+;This is a negative test. The constant value given for the constraint (N).
+;immediate in the range of -65535 to -1 (inclusive).
+;Our example uses the positive value 3.
+;
+; RUN: not llc -march=mipsel < %s 2> %t
+; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+
+define i32 @main() nounwind {
+entry:
+
+;CHECK-ERRORS: error: invalid operand for inline asm constraint 'N'
+
+ tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,N"(i32 7, i32 3) nounwind
+ ret i32 0
+}
+
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-O.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-O.ll
new file mode 100644
index 000000000000..cd4431ac5265
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-O.ll
@@ -0,0 +1,16 @@
+;
+;This is a negative test. The constant value given for the constraint (O).
+;signed 15 bit immediate (+- 16383).
+;Our example uses the positive value 16384.
+;
+; RUN: not llc -march=mipsel < %s 2> %t
+; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+
+define i32 @main() nounwind {
+entry:
+
+;CHECK-ERRORS: error: invalid operand for inline asm constraint 'O'
+
+ tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,O"(i32 undef, i32 16384) nounwind
+ ret i32 0
+}
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-P.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-P.ll
new file mode 100644
index 000000000000..0a4739ebb96b
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-P.ll
@@ -0,0 +1,16 @@
+;
+; This is a negative test. The constant value given for the constraint (P).
+; A constant in the range of 1 to 655535 inclusive.
+; Our example uses the positive value 655536.
+;
+; RUN: not llc -march=mipsel < %s 2> %t
+; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+
+define i32 @main() nounwind {
+entry:
+
+;CHECK-ERRORS: error: invalid operand for inline asm constraint 'P'
+
+ tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,P"(i32 undef, i32 655536) nounwind
+ ret i32 0
+}
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll
new file mode 100644
index 000000000000..94ded307fda9
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll
@@ -0,0 +1,44 @@
+; Positive test for inline register constraints
+;
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+define i32 @main() nounwind {
+entry:
+
+; r with char
+;CHECK: #APP
+;CHECK: addi ${{[0-9]+}},${{[0-9]+}},23
+;CHECK: #NO_APP
+ tail call i8 asm sideeffect "addi $0,$1,$2", "=r,r,n"(i8 27, i8 23) nounwind
+
+; r with short
+;CHECK: #APP
+;CHECK: addi ${{[0-9]+}},${{[0-9]+}},13
+;CHECK: #NO_APP
+ tail call i16 asm sideeffect "addi $0,$1,$2", "=r,r,n"(i16 17, i16 13) nounwind
+
+; r with int
+;CHECK: #APP
+;CHECK: addi ${{[0-9]+}},${{[0-9]+}},3
+;CHECK: #NO_APP
+ tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,n"(i32 7, i32 3) nounwind
+
+; Now c with 1024: make sure register $25 is picked
+; CHECK: #APP
+; CHECK: addi $25,${{[0-9]+}},1024
+; CHECK: #NO_APP
+ tail call i32 asm sideeffect "addi $0,$1,$2", "=c,c,I"(i32 4194304, i32 1024) nounwind
+
+; Now l with 1024: make sure register lo is picked. We do this by checking the instruction
+; after the inline expression for a mflo to pull the value out of lo.
+; CHECK: #APP
+; CHECK-NEXT: mtlo ${{[0-9]+}}
+; CHECK-NEXT: madd ${{[0-9]+}},${{[0-9]+}}
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: mflo ${{[0-9]+}}
+ %bosco = alloca i32, align 4
+ call i32 asm sideeffect "\09mtlo $3 \0A\09\09madd $1,$2 ", "=l,r,r,r"(i32 7, i32 6, i32 44) nounwind
+ store volatile i32 %4, i32* %bosco, align 4
+
+ ret i32 0
+}
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll
new file mode 100644
index 000000000000..787066602575
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll
@@ -0,0 +1,20 @@
+;
+; Register constraint "r" shouldn't take long long unless
+; The target is 64 bit.
+;
+;
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s
+
+
+define i32 @main() nounwind {
+entry:
+
+
+; r with long long
+;CHECK: #APP
+;CHECK: addi ${{[0-9]+}},${{[0-9]+}},3
+;CHECK: #NO_APP
+ tail call i64 asm sideeffect "addi $0,$1,$2", "=r,r,i"(i64 7, i64 3) nounwind
+ ret i32 0
+}
+
diff --git a/test/CodeGen/Mips/inlineasm-operand-code.ll b/test/CodeGen/Mips/inlineasm-operand-code.ll
new file mode 100644
index 000000000000..01978994b2f5
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm-operand-code.ll
@@ -0,0 +1,153 @@
+; Positive test for inline register constraints
+;
+; RUN: llc -march=mipsel < %s | FileCheck -check-prefix=CHECK_LITTLE_32 %s
+; RUN: llc -march=mips < %s | FileCheck -check-prefix=CHECK_BIG_32 %s
+
+%union.u_tag = type { i64 }
+%struct.anon = type { i32, i32 }
+@uval = common global %union.u_tag zeroinitializer, align 8
+
+; X with -3
+define i32 @constraint_X() nounwind {
+entry:
+;CHECK_LITTLE_32: constraint_X:
+;CHECK_LITTLE_32: #APP
+;CHECK_LITTLE_32: addi ${{[0-9]+}},${{[0-9]+}},0xfffffffffffffffd
+;CHECK_LITTLE_32: #NO_APP
+ tail call i32 asm sideeffect "addi $0,$1,${2:X}", "=r,r,I"(i32 7, i32 -3) ;
+ ret i32 0
+}
+
+; x with -3
+define i32 @constraint_x() nounwind {
+entry:
+;CHECK_LITTLE_32: constraint_x:
+;CHECK_LITTLE_32: #APP
+;CHECK_LITTLE_32: addi ${{[0-9]+}},${{[0-9]+}},0xfffd
+;CHECK_LITTLE_32: #NO_APP
+ tail call i32 asm sideeffect "addi $0,$1,${2:x}", "=r,r,I"(i32 7, i32 -3) ;
+ ret i32 0
+}
+
+; d with -3
+define i32 @constraint_d() nounwind {
+entry:
+;CHECK_LITTLE_32: constraint_d:
+;CHECK_LITTLE_32: #APP
+;CHECK_LITTLE_32: addi ${{[0-9]+}},${{[0-9]+}},-3
+;CHECK_LITTLE_32: #NO_APP
+ tail call i32 asm sideeffect "addi $0,$1,${2:d}", "=r,r,I"(i32 7, i32 -3) ;
+ ret i32 0
+}
+
+; m with -3
+define i32 @constraint_m() nounwind {
+entry:
+;CHECK_LITTLE_32: constraint_m:
+;CHECK_LITTLE_32: #APP
+;CHECK_LITTLE_32: addi ${{[0-9]+}},${{[0-9]+}},-4
+;CHECK_LITTLE_32: #NO_APP
+ tail call i32 asm sideeffect "addi $0,$1,${2:m}", "=r,r,I"(i32 7, i32 -3) ;
+ ret i32 0
+}
+
+; z with -3
+define i32 @constraint_z() nounwind {
+entry:
+;CHECK_LITTLE_32: constraint_z:
+;CHECK_LITTLE_32: #APP
+;CHECK_LITTLE_32: addi ${{[0-9]+}},${{[0-9]+}},-3
+;CHECK_LITTLE_32: #NO_APP
+ tail call i32 asm sideeffect "addi $0,$1,${2:z}", "=r,r,I"(i32 7, i32 -3) ;
+
+; z with 0
+;CHECK_LITTLE_32: #APP
+;CHECK_LITTLE_32: addi ${{[0-9]+}},${{[0-9]+}},$0
+;CHECK_LITTLE_32: #NO_APP
+ tail call i32 asm sideeffect "addi $0,$1,${2:z}", "=r,r,I"(i32 7, i32 0) nounwind
+ ret i32 0
+}
+
+; a long long in 32 bit mode (use to assert)
+define i32 @constraint_longlong() nounwind {
+entry:
+;CHECK_LITTLE_32: constraint_longlong:
+;CHECK_LITTLE_32: #APP
+;CHECK_LITTLE_32: addi ${{[0-9]+}},${{[0-9]+}},3
+;CHECK_LITTLE_32: #NO_APP
+ tail call i64 asm sideeffect "addi $0,$1,$2 \0A\09", "=r,r,X"(i64 1229801703532086340, i64 3) nounwind
+ ret i32 0
+}
+
+; D, in little endian the source reg will be 4 bytes into the long long
+define i32 @constraint_D() nounwind {
+entry:
+;CHECK_LITTLE_32: constraint_D:
+;CHECK_LITTLE_32: lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
+;CHECK_LITTLE_32: lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
+;CHECK_LITTLE_32: lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
+;CHECK_LITTLE_32: #APP
+;CHECK_LITTLE_32: or ${{[0-9]+}},$[[SECOND]],${{[0-9]+}}
+;CHECK_LITTLE_32: #NO_APP
+
+; D, in big endian the source reg will also be 4 bytes into the long long
+;CHECK_BIG_32: constraint_D:
+;CHECK_BIG_32: lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
+;CHECK_BIG_32: lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
+;CHECK_BIG_32: lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
+;CHECK_BIG_32: #APP
+;CHECK_BIG_32: or ${{[0-9]+}},$[[SECOND]],${{[0-9]+}}
+;CHECK_BIG_32: #NO_APP
+ %bosco = load i64* getelementptr inbounds (%union.u_tag* @uval, i32 0, i32 0), align 8
+ %trunc1 = trunc i64 %bosco to i32
+ tail call i32 asm sideeffect "or $0,${1:D},$2", "=r,r,r"(i64 %bosco, i32 %trunc1) nounwind
+ ret i32 0
+}
+
+; L, in little endian the source reg will be 0 bytes into the long long
+define i32 @constraint_L() nounwind {
+entry:
+;CHECK_LITTLE_32: constraint_L:
+;CHECK_LITTLE_32: lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
+;CHECK_LITTLE_32: lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
+;CHECK_LITTLE_32: lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
+;CHECK_LITTLE_32: #APP
+;CHECK_LITTLE_32: or ${{[0-9]+}},$[[FIRST]],${{[0-9]+}}
+;CHECK_LITTLE_32: #NO_APP
+; L, in big endian the source reg will be 4 bytes into the long long
+;CHECK_BIG_32: constraint_L:
+;CHECK_BIG_32: lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
+;CHECK_BIG_32: lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
+;CHECK_BIG_32: lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
+;CHECK_BIG_32: #APP
+;CHECK_BIG_32: or ${{[0-9]+}},$[[SECOND]],${{[0-9]+}}
+;CHECK_BIG_32: #NO_APP
+ %bosco = load i64* getelementptr inbounds (%union.u_tag* @uval, i32 0, i32 0), align 8
+ %trunc1 = trunc i64 %bosco to i32
+ tail call i32 asm sideeffect "or $0,${1:L},$2", "=r,r,r"(i64 %bosco, i32 %trunc1) nounwind
+ ret i32 0
+}
+
+; M, in little endian the source reg will be 4 bytes into the long long
+define i32 @constraint_M() nounwind {
+entry:
+;CHECK_LITTLE_32: constraint_M:
+;CHECK_LITTLE_32: lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
+;CHECK_LITTLE_32: lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
+;CHECK_LITTLE_32: lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
+;CHECK_LITTLE_32: #APP
+;CHECK_LITTLE_32: or ${{[0-9]+}},$[[SECOND]],${{[0-9]+}}
+;CHECK_LITTLE_32: #NO_APP
+; M, in big endian the source reg will be 0 bytes into the long long
+;CHECK_BIG_32: constraint_M:
+;CHECK_BIG_32: lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
+;CHECK_BIG_32: lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
+;CHECK_BIG_32: lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
+;CHECK_BIG_32: #APP
+;CHECK_BIG_32: or ${{[0-9]+}},$[[FIRST]],${{[0-9]+}}
+;CHECK_BIG_32: #NO_APP
+ %bosco = load i64* getelementptr inbounds (%union.u_tag* @uval, i32 0, i32 0), align 8
+ %trunc1 = trunc i64 %bosco to i32
+ tail call i32 asm sideeffect "or $0,${1:M},$2", "=r,r,r"(i64 %bosco, i32 %trunc1) nounwind
+ ret i32 0
+}
diff --git a/test/CodeGen/Mips/inlineasm_constraint.ll b/test/CodeGen/Mips/inlineasm_constraint.ll
new file mode 100644
index 000000000000..5adec3bb29ea
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm_constraint.ll
@@ -0,0 +1,55 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+define i32 @main() nounwind {
+entry:
+
+; First I with short
+; CHECK: #APP
+; CHECK: addi ${{[0-9]+}},${{[0-9]+}},4096
+; CHECK: #NO_APP
+ tail call i16 asm sideeffect "addi $0,$1,$2", "=r,r,I"(i16 7, i16 4096) nounwind
+
+; Then I with int
+; CHECK: #APP
+; CHECK: addi ${{[0-9]+}},${{[0-9]+}},-3
+; CHECK: #NO_APP
+ tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,I"(i32 7, i32 -3) nounwind
+
+; Now J with 0
+; CHECK: #APP
+; CHECK: addi ${{[0-9]+}},${{[0-9]+}},0
+; CHECK: #NO_APP
+ tail call i32 asm sideeffect "addi $0,$1,$2\0A\09 ", "=r,r,J"(i32 7, i16 0) nounwind
+
+; Now K with 64
+; CHECK: #APP
+; CHECK: addu ${{[0-9]+}},${{[0-9]+}},64
+; CHECK: #NO_APP
+ tail call i16 asm sideeffect "addu $0,$1,$2\0A\09 ", "=r,r,K"(i16 7, i16 64) nounwind
+
+; Now L with 0x00100000
+; CHECK: #APP
+; CHECK: add ${{[0-9]+}},${{[0-9]+}},${{[0-9]+}}
+; CHECK: #NO_APP
+ tail call i32 asm sideeffect "add $0,$1,$3\0A\09", "=r,r,L,r"(i32 7, i32 1048576, i32 0) nounwind
+
+; Now N with -3
+; CHECK: #APP
+; CHECK: addi ${{[0-9]+}},${{[0-9]+}},-3
+; CHECK: #NO_APP
+ tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,N"(i32 7, i32 -3) nounwind
+
+; Now O with -3
+; CHECK: #APP
+; CHECK: addi ${{[0-9]+}},${{[0-9]+}},-3
+; CHECK: #NO_APP
+ tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,O"(i32 7, i16 -3) nounwind
+
+; Now P with 65535
+; CHECK: #APP
+; CHECK: addi ${{[0-9]+}},${{[0-9]+}},65535
+; CHECK: #NO_APP
+ tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,P"(i32 7, i32 65535) nounwind
+
+ ret i32 0
+}
diff --git a/test/CodeGen/Mips/inlineasmmemop.ll b/test/CodeGen/Mips/inlineasmmemop.ll
index 4b31a88b418a..1c7c4437b892 100644
--- a/test/CodeGen/Mips/inlineasmmemop.ll
+++ b/test/CodeGen/Mips/inlineasmmemop.ll
@@ -11,7 +11,7 @@ entry:
; CHECK: #APP
; CHECK: lw $[[T3:[0-9]+]], 0($[[T0]])
; CHECK: #NO_APP
-; CHECK: lw $[[T1:[0-9]+]], %got(g1)($gp)
+; CHECK: lw $[[T1:[0-9]+]], %got(g1)
; CHECK: sw $[[T3]], 0($[[T1]])
%l1 = alloca i32, align 4
diff --git a/test/CodeGen/Mips/internalfunc.ll b/test/CodeGen/Mips/internalfunc.ll
index 434b3868968a..863375ad4d4a 100644
--- a/test/CodeGen/Mips/internalfunc.ll
+++ b/test/CodeGen/Mips/internalfunc.ll
@@ -6,7 +6,7 @@
define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
entry:
-; CHECK: lw $[[R0:[0-9]+]], %got(f2)($gp)
+; CHECK: lw $[[R0:[0-9]+]], %got(f2)
; CHECK: addiu $25, $[[R0]], %lo(f2)
tail call fastcc void @f2()
ret i32 0
@@ -14,7 +14,7 @@ entry:
define void @caller(i32 %a0, i32 %a1) nounwind {
entry:
-; CHECK: lw $[[R1:[0-9]+]], %got(caller.sf1)($gp)
+; CHECK: lw $[[R1:[0-9]+]], %got(caller.sf1)
; CHECK: lw $25, %lo(caller.sf1)($[[R1]])
%tobool = icmp eq i32 %a1, 0
br i1 %tobool, label %if.end, label %if.then
@@ -25,9 +25,9 @@ if.then: ; preds = %entry
br label %if.end
if.end: ; preds = %entry, %if.then
-; CHECK: lw $[[R2:[0-9]+]], %got(sf2)($gp)
+; CHECK: lw $[[R2:[0-9]+]], %got(sf2)
; CHECK: addiu ${{[0-9]+}}, $[[R2]], %lo(sf2)
-; CHECK: lw $[[R3:[0-9]+]], %got(caller.sf1)($gp)
+; CHECK: lw $[[R3:[0-9]+]], %got(caller.sf1)
; CHECK: sw ${{[0-9]+}}, %lo(caller.sf1)($[[R3]])
%tobool3 = icmp ne i32 %a0, 0
%tmp4 = load void (...)** @gf1, align 4
diff --git a/test/CodeGen/Mips/largeimmprinting.ll b/test/CodeGen/Mips/largeimmprinting.ll
index b7c9a9ccbb58..2e548790cd39 100644
--- a/test/CodeGen/Mips/largeimmprinting.ll
+++ b/test/CodeGen/Mips/largeimmprinting.ll
@@ -6,10 +6,9 @@
define void @f() nounwind {
entry:
-; CHECK: lui $at, 65534
-; CHECK: addiu $at, $at, -24
+; CHECK: lui $at, 65535
+; CHECK: addiu $at, $at, -16
; CHECK: addu $sp, $sp, $at
-; CHECK: .cprestore 65536
%agg.tmp = alloca %struct.S1, align 1
%tmp = getelementptr inbounds %struct.S1* %agg.tmp, i32 0, i32 0, i32 0
diff --git a/test/CodeGen/Mips/lb1.ll b/test/CodeGen/Mips/lb1.ll
new file mode 100644
index 000000000000..aac2767a4e40
--- /dev/null
+++ b/test/CodeGen/Mips/lb1.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@c = global i8 -1, align 1
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+ %i = alloca i32, align 4
+ %0 = load i8* @c, align 1
+; 16: lb ${{[0-9]+}}, 0(${{[0-9]+}})
+ %conv = sext i8 %0 to i32
+ store i32 %conv, i32* %i, align 4
+ %1 = load i32* %i, align 4
+ %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %1)
+ ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/lbu1.ll b/test/CodeGen/Mips/lbu1.ll
new file mode 100644
index 000000000000..63e0cca1684d
--- /dev/null
+++ b/test/CodeGen/Mips/lbu1.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@c = global i8 97, align 1
+@.str = private unnamed_addr constant [5 x i8] c"%c \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+ %i = alloca i32, align 4
+ %0 = load i8* @c, align 1
+ %conv = zext i8 %0 to i32
+; 16: lbu ${{[0-9]+}}, 0(${{[0-9]+}})
+ store i32 %conv, i32* %i, align 4
+ %1 = load i8* @c, align 1
+ %conv1 = zext i8 %1 to i32
+ %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %conv1)
+ ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/lh1.ll b/test/CodeGen/Mips/lh1.ll
new file mode 100644
index 000000000000..1f95b0903466
--- /dev/null
+++ b/test/CodeGen/Mips/lh1.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@s = global i16 -1, align 2
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+ %i = alloca i32, align 4
+ %0 = load i16* @s, align 2
+ %conv = sext i16 %0 to i32
+; 16: lh ${{[0-9]+}}, 0(${{[0-9]+}})
+ store i32 %conv, i32* %i, align 4
+ %1 = load i32* %i, align 4
+ %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %1)
+ ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/lhu1.ll b/test/CodeGen/Mips/lhu1.ll
new file mode 100644
index 000000000000..0cfcede669e0
--- /dev/null
+++ b/test/CodeGen/Mips/lhu1.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+
+@s = global i16 255, align 2
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+ %i = alloca i32, align 4
+ %0 = load i16* @s, align 2
+ %conv = zext i16 %0 to i32
+; 16: lhu ${{[0-9]+}}, 0(${{[0-9]+}})
+ store i32 %conv, i32* %i, align 4
+ %1 = load i32* %i, align 4
+ %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %1)
+ ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/load-store-left-right.ll b/test/CodeGen/Mips/load-store-left-right.ll
new file mode 100644
index 000000000000..d0928ee26613
--- /dev/null
+++ b/test/CodeGen/Mips/load-store-left-right.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=mipsel < %s | FileCheck -check-prefix=EL %s
+; RUN: llc -march=mips < %s | FileCheck -check-prefix=EB %s
+
+%struct.SI = type { i32 }
+
+@si = common global %struct.SI zeroinitializer, align 1
+
+define i32 @foo_load_i() nounwind readonly {
+entry:
+; EL: lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; EL: lwr $[[R0]], 0($[[R1]])
+; EB: lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; EB: lwr $[[R0]], 3($[[R1]])
+
+ %0 = load i32* getelementptr inbounds (%struct.SI* @si, i32 0, i32 0), align 1
+ ret i32 %0
+}
+
+define void @foo_store_i(i32 %a) nounwind {
+entry:
+; EL: swl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; EL: swr $[[R0]], 0($[[R1]])
+; EB: swl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; EB: swr $[[R0]], 3($[[R1]])
+
+ store i32 %a, i32* getelementptr inbounds (%struct.SI* @si, i32 0, i32 0), align 1
+ ret void
+}
+
diff --git a/test/CodeGen/Mips/longbranch.ll b/test/CodeGen/Mips/longbranch.ll
new file mode 100644
index 000000000000..0227b88fbc86
--- /dev/null
+++ b/test/CodeGen/Mips/longbranch.ll
@@ -0,0 +1,25 @@
+; RUN: llc -march=mipsel -force-mips-long-branch < %s | FileCheck %s -check-prefix=O32
+; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 -force-mips-long-branch < %s | FileCheck %s -check-prefix=N64
+
+@g0 = external global i32
+
+define void @foo1(i32 %s) nounwind {
+entry:
+; O32: bal
+; N64: bal
+; N64: highest
+; N64: higher
+
+ %tobool = icmp eq i32 %s, 0
+ br i1 %tobool, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ %0 = load i32* @g0, align 4
+ %add = add nsw i32 %0, 12
+ store i32 %add, i32* @g0, align 4
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
diff --git a/test/CodeGen/Mips/machineverifier.ll b/test/CodeGen/Mips/machineverifier.ll
new file mode 100644
index 000000000000..c673fe557e6b
--- /dev/null
+++ b/test/CodeGen/Mips/machineverifier.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=mipsel -verify-machineinstrs
+; Make sure machine verifier understands the last instruction of a basic block
+; is not the terminator instruction after delay slot filler pass is run.
+
+@g = external global i32
+
+define void @foo() nounwind {
+entry:
+ %0 = load i32* @g, align 4
+ %tobool = icmp eq i32 %0, 0
+ br i1 %tobool, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ %add = add nsw i32 %0, 10
+ store i32 %add, i32* @g, align 4
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
diff --git a/test/CodeGen/Mips/memcpy.ll b/test/CodeGen/Mips/memcpy.ll
new file mode 100644
index 000000000000..39764a936381
--- /dev/null
+++ b/test/CodeGen/Mips/memcpy.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+%struct.S1 = type { i32, [41 x i8] }
+
+@.str = private unnamed_addr constant [31 x i8] c"abcdefghijklmnopqrstuvwxyzABCD\00", align 1
+
+define void @foo1(%struct.S1* %s1, i8 signext %n) nounwind {
+entry:
+; CHECK-NOT: call16(memcpy
+
+ %arraydecay = getelementptr inbounds %struct.S1* %s1, i32 0, i32 1, i32 0
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %arraydecay, i8* getelementptr inbounds ([31 x i8]* @.str, i32 0, i32 0), i32 31, i32 1, i1 false)
+ %arrayidx = getelementptr inbounds %struct.S1* %s1, i32 0, i32 1, i32 40
+ store i8 %n, i8* %arrayidx, align 1
+ ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
diff --git a/test/CodeGen/Mips/mips64-fp-indexed-ls.ll b/test/CodeGen/Mips/mips64-fp-indexed-ls.ll
index 09745fb8f61c..bbdc05cd2d8f 100644
--- a/test/CodeGen/Mips/mips64-fp-indexed-ls.ll
+++ b/test/CodeGen/Mips/mips64-fp-indexed-ls.ll
@@ -30,7 +30,7 @@ entry:
define float @foo2(i32 %b, i32 %c) nounwind readonly {
entry:
-; CHECK: luxc1
+; CHECK-NOT: luxc1
%idxprom = zext i32 %c to i64
%idxprom1 = zext i32 %b to i64
%arrayidx2 = getelementptr inbounds [4 x %struct.S]* @s, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
@@ -60,7 +60,7 @@ entry:
define void @foo5(i32 %b, i32 %c) nounwind {
entry:
-; CHECK: suxc1
+; CHECK-NOT: suxc1
%0 = load float* @gf, align 4
%idxprom = zext i32 %c to i64
%idxprom1 = zext i32 %b to i64
@@ -72,7 +72,7 @@ entry:
define double @foo6(i32 %b, i32 %c) nounwind readonly {
entry:
; CHECK: foo6
-; CHECK-NOT: ldxc1
+; CHECK-NOT: luxc1
%idxprom = zext i32 %c to i64
%idxprom1 = zext i32 %b to i64
%arrayidx2 = getelementptr inbounds [4 x %struct.S2]* @s2, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
@@ -83,7 +83,7 @@ entry:
define void @foo7(i32 %b, i32 %c) nounwind {
entry:
; CHECK: foo7
-; CHECK-NOT: sdxc1
+; CHECK-NOT: suxc1
%0 = load double* @gd, align 8
%idxprom = zext i32 %c to i64
%idxprom1 = zext i32 %b to i64
@@ -95,7 +95,7 @@ entry:
define float @foo8() nounwind readonly {
entry:
; CHECK: foo8
-; CHECK: luxc1
+; CHECK-NOT: luxc1
%0 = load float* getelementptr inbounds (%struct.S3* @s3, i64 0, i32 1), align 1
ret float %0
}
@@ -103,7 +103,7 @@ entry:
define void @foo9(float %f) nounwind {
entry:
; CHECK: foo9
-; CHECK: suxc1
+; CHECK-NOT: suxc1
store float %f, float* getelementptr inbounds (%struct.S3* @s3, i64 0, i32 1), align 1
ret void
}
diff --git a/test/CodeGen/Mips/mips64load-store-left-right.ll b/test/CodeGen/Mips/mips64load-store-left-right.ll
new file mode 100644
index 000000000000..4561429ad8b9
--- /dev/null
+++ b/test/CodeGen/Mips/mips64load-store-left-right.ll
@@ -0,0 +1,73 @@
+; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 < %s | FileCheck -check-prefix=EL %s
+; RUN: llc -march=mips64 -mcpu=mips64 -mattr=n64 < %s | FileCheck -check-prefix=EB %s
+
+%struct.SLL = type { i64 }
+%struct.SI = type { i32 }
+%struct.SUI = type { i32 }
+
+@sll = common global %struct.SLL zeroinitializer, align 1
+@si = common global %struct.SI zeroinitializer, align 1
+@sui = common global %struct.SUI zeroinitializer, align 1
+
+define i64 @foo_load_ll() nounwind readonly {
+entry:
+; EL: ldl $[[R0:[0-9]+]], 7($[[R1:[0-9]+]])
+; EL: ldr $[[R0]], 0($[[R1]])
+; EB: ldl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; EB: ldr $[[R0]], 7($[[R1]])
+
+ %0 = load i64* getelementptr inbounds (%struct.SLL* @sll, i64 0, i32 0), align 1
+ ret i64 %0
+}
+
+define i64 @foo_load_i() nounwind readonly {
+entry:
+; EL: lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; EL: lwr $[[R0]], 0($[[R1]])
+; EB: lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; EB: lwr $[[R0]], 3($[[R1]])
+
+ %0 = load i32* getelementptr inbounds (%struct.SI* @si, i64 0, i32 0), align 1
+ %conv = sext i32 %0 to i64
+ ret i64 %conv
+}
+
+define i64 @foo_load_ui() nounwind readonly {
+entry:
+; EL: lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; EL: lwr $[[R0]], 0($[[R1]])
+; EL: daddiu $[[R2:[0-9]+]], $zero, 1
+; EL: dsll $[[R3:[0-9]+]], $[[R2]], 32
+; EL: daddiu $[[R4:[0-9]+]], $[[R3]], -1
+; EL: and ${{[0-9]+}}, $[[R0]], $[[R4]]
+; EB: lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; EB: lwr $[[R0]], 3($[[R1]])
+
+
+ %0 = load i32* getelementptr inbounds (%struct.SUI* @sui, i64 0, i32 0), align 1
+ %conv = zext i32 %0 to i64
+ ret i64 %conv
+}
+
+define void @foo_store_ll(i64 %a) nounwind {
+entry:
+; EL: sdl $[[R0:[0-9]+]], 7($[[R1:[0-9]+]])
+; EL: sdr $[[R0]], 0($[[R1]])
+; EB: sdl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; EB: sdr $[[R0]], 7($[[R1]])
+
+ store i64 %a, i64* getelementptr inbounds (%struct.SLL* @sll, i64 0, i32 0), align 1
+ ret void
+}
+
+define void @foo_store_i(i32 %a) nounwind {
+entry:
+; EL: swl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; EL: swr $[[R0]], 0($[[R1]])
+; EB: swl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; EB: swr $[[R0]], 3($[[R1]])
+
+ store i32 %a, i32* getelementptr inbounds (%struct.SI* @si, i64 0, i32 0), align 1
+ ret void
+}
+
diff --git a/test/CodeGen/Mips/neg1.ll b/test/CodeGen/Mips/neg1.ll
new file mode 100644
index 000000000000..281e62621565
--- /dev/null
+++ b/test/CodeGen/Mips/neg1.ll
@@ -0,0 +1,15 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 10, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+ %0 = load i32* @i, align 4
+ %sub = sub nsw i32 0, %0
+; 16: neg ${{[0-9]+}}, ${{[0-9]+}}
+ %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %sub)
+ ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/not1.ll b/test/CodeGen/Mips/not1.ll
new file mode 100644
index 000000000000..2163b236c56f
--- /dev/null
+++ b/test/CodeGen/Mips/not1.ll
@@ -0,0 +1,16 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@x = global i32 65504, align 4
+@y = global i32 60929, align 4
+@.str = private unnamed_addr constant [7 x i8] c"%08x \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+ %0 = load i32* @x, align 4
+ %neg = xor i32 %0, -1
+; 16: not ${{[0-9]+}}, ${{[0-9]+}}
+ %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), i32 %neg)
+ ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/null.ll b/test/CodeGen/Mips/null.ll
new file mode 100644
index 000000000000..7beae99c4557
--- /dev/null
+++ b/test/CodeGen/Mips/null.ll
@@ -0,0 +1,13 @@
+; RUN: llc -march=mipsel -mcpu=mips16 < %s | FileCheck %s -check-prefix=16
+
+
+define i32 @main() nounwind {
+entry:
+ ret i32 0
+
+; 16: .set mips16 # @main
+
+
+; 16: jr $ra
+
+}
diff --git a/test/CodeGen/Mips/o32_cc_byval.ll b/test/CodeGen/Mips/o32_cc_byval.ll
index c5cbc7a66b8c..eac0d80c1c57 100644
--- a/test/CodeGen/Mips/o32_cc_byval.ll
+++ b/test/CodeGen/Mips/o32_cc_byval.ll
@@ -10,19 +10,19 @@
define void @f1() nounwind {
entry:
-; CHECK: lw $[[R1:[0-9]+]], %got(f1.s1)($gp)
+; CHECK: lw $[[R1:[0-9]+]], %got(f1.s1)
; CHECK: addiu $[[R0:[0-9]+]], $[[R1]], %lo(f1.s1)
; CHECK: lw $[[R6:[0-9]+]], 28($[[R0]])
-; CHECK: lw $[[R5:[0-9]+]], 24($[[R0]])
-; CHECK: lw $[[R4:[0-9]+]], 20($[[R0]])
-; CHECK: lw $[[R3:[0-9]+]], 16($[[R0]])
-; CHECK: lw $[[R7:[0-9]+]], 12($[[R0]])
-; CHECK: lw $[[R2:[0-9]+]], 8($[[R0]])
; CHECK: sw $[[R6]], 36($sp)
+; CHECK: lw $[[R5:[0-9]+]], 24($[[R0]])
; CHECK: sw $[[R5]], 32($sp)
+; CHECK: lw $[[R4:[0-9]+]], 20($[[R0]])
; CHECK: sw $[[R4]], 28($sp)
+; CHECK: lw $[[R3:[0-9]+]], 16($[[R0]])
; CHECK: sw $[[R3]], 24($sp)
+; CHECK: lw $[[R7:[0-9]+]], 12($[[R0]])
; CHECK: sw $[[R7]], 20($sp)
+; CHECK: lw $[[R2:[0-9]+]], 8($[[R0]])
; CHECK: sw $[[R2]], 16($sp)
; CHECK: lw $7, 4($[[R0]])
; CHECK: lw $6, %lo(f1.s1)($[[R1]])
@@ -43,16 +43,16 @@ declare void @callee3(float, %struct.S3* byval, %struct.S1* byval)
define void @f2(float %f, %struct.S1* nocapture byval %s1) nounwind {
entry:
-; CHECK: addiu $sp, $sp, -56
-; CHECK: sw $7, 68($sp)
-; CHECK: sw $6, 64($sp)
-; CHECK: lw $4, 88($sp)
-; CHECK: ldc1 $f[[F0:[0-9]+]], 80($sp)
-; CHECK: lw $[[R3:[0-9]+]], 72($sp)
-; CHECK: lw $[[R4:[0-9]+]], 76($sp)
-; CHECK: lw $[[R2:[0-9]+]], 68($sp)
-; CHECK: lh $[[R1:[0-9]+]], 66($sp)
-; CHECK: lb $[[R0:[0-9]+]], 64($sp)
+; CHECK: addiu $sp, $sp, -48
+; CHECK: sw $7, 60($sp)
+; CHECK: sw $6, 56($sp)
+; CHECK: lw $4, 80($sp)
+; CHECK: ldc1 $f[[F0:[0-9]+]], 72($sp)
+; CHECK: lw $[[R3:[0-9]+]], 64($sp)
+; CHECK: lw $[[R4:[0-9]+]], 68($sp)
+; CHECK: lw $[[R2:[0-9]+]], 60($sp)
+; CHECK: lh $[[R1:[0-9]+]], 58($sp)
+; CHECK: lb $[[R0:[0-9]+]], 56($sp)
; CHECK: sw $[[R0]], 32($sp)
; CHECK: sw $[[R1]], 28($sp)
; CHECK: sw $[[R2]], 24($sp)
@@ -80,13 +80,13 @@ declare void @callee4(i32, double, i64, i32, i16 signext, i8 signext, float)
define void @f3(%struct.S2* nocapture byval %s2) nounwind {
entry:
-; CHECK: addiu $sp, $sp, -56
-; CHECK: sw $7, 68($sp)
-; CHECK: sw $6, 64($sp)
-; CHECK: sw $5, 60($sp)
-; CHECK: sw $4, 56($sp)
-; CHECK: lw $4, 56($sp)
-; CHECK: lw $[[R0:[0-9]+]], 68($sp)
+; CHECK: addiu $sp, $sp, -48
+; CHECK: sw $7, 60($sp)
+; CHECK: sw $6, 56($sp)
+; CHECK: sw $5, 52($sp)
+; CHECK: sw $4, 48($sp)
+; CHECK: lw $4, 48($sp)
+; CHECK: lw $[[R0:[0-9]+]], 60($sp)
; CHECK: sw $[[R0]], 24($sp)
%arrayidx = getelementptr inbounds %struct.S2* %s2, i32 0, i32 0, i32 0
@@ -99,13 +99,13 @@ entry:
define void @f4(float %f, %struct.S3* nocapture byval %s3, %struct.S1* nocapture byval %s1) nounwind {
entry:
-; CHECK: addiu $sp, $sp, -56
-; CHECK: sw $7, 68($sp)
-; CHECK: sw $6, 64($sp)
-; CHECK: sw $5, 60($sp)
-; CHECK: lw $4, 68($sp)
-; CHECK: lw $[[R1:[0-9]+]], 88($sp)
-; CHECK: lb $[[R0:[0-9]+]], 60($sp)
+; CHECK: addiu $sp, $sp, -48
+; CHECK: sw $7, 60($sp)
+; CHECK: sw $6, 56($sp)
+; CHECK: sw $5, 52($sp)
+; CHECK: lw $4, 60($sp)
+; CHECK: lw $[[R1:[0-9]+]], 80($sp)
+; CHECK: lb $[[R0:[0-9]+]], 52($sp)
; CHECK: sw $[[R0]], 32($sp)
; CHECK: sw $[[R1]], 24($sp)
diff --git a/test/CodeGen/Mips/o32_cc_vararg.ll b/test/CodeGen/Mips/o32_cc_vararg.ll
index 4a3d9ab8375c..35332b6550d8 100644
--- a/test/CodeGen/Mips/o32_cc_vararg.ll
+++ b/test/CodeGen/Mips/o32_cc_vararg.ll
@@ -1,6 +1,5 @@
; RUN: llc -march=mipsel -pre-RA-sched=source < %s | FileCheck %s
-
; All test functions do the same thing - they return the first variable
; argument.
diff --git a/test/CodeGen/Mips/or1.ll b/test/CodeGen/Mips/or1.ll
new file mode 100644
index 000000000000..b1c36961f92b
--- /dev/null
+++ b/test/CodeGen/Mips/or1.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@x = global i32 65504, align 4
+@y = global i32 60929, align 4
+@.str = private unnamed_addr constant [7 x i8] c"%08x \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+ %0 = load i32* @x, align 4
+ %1 = load i32* @y, align 4
+ %or = or i32 %0, %1
+; 16: or ${{[0-9]+}}, ${{[0-9]+}}
+ %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), i32 %or)
+ ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/ra-allocatable.ll b/test/CodeGen/Mips/ra-allocatable.ll
new file mode 100644
index 000000000000..76217886829a
--- /dev/null
+++ b/test/CodeGen/Mips/ra-allocatable.ll
@@ -0,0 +1,288 @@
+; RUN: llc < %s -march=mipsel | FileCheck %s
+
+@a0 = external global i32
+@b0 = external global i32*
+@a1 = external global i32
+@b1 = external global i32*
+@a2 = external global i32
+@b2 = external global i32*
+@a3 = external global i32
+@b3 = external global i32*
+@a4 = external global i32
+@b4 = external global i32*
+@a5 = external global i32
+@b5 = external global i32*
+@a6 = external global i32
+@b6 = external global i32*
+@a7 = external global i32
+@b7 = external global i32*
+@a8 = external global i32
+@b8 = external global i32*
+@a9 = external global i32
+@b9 = external global i32*
+@a10 = external global i32
+@b10 = external global i32*
+@a11 = external global i32
+@b11 = external global i32*
+@a12 = external global i32
+@b12 = external global i32*
+@a13 = external global i32
+@b13 = external global i32*
+@a14 = external global i32
+@b14 = external global i32*
+@a15 = external global i32
+@b15 = external global i32*
+@a16 = external global i32
+@b16 = external global i32*
+@a17 = external global i32
+@b17 = external global i32*
+@a18 = external global i32
+@b18 = external global i32*
+@a19 = external global i32
+@b19 = external global i32*
+@a20 = external global i32
+@b20 = external global i32*
+@a21 = external global i32
+@b21 = external global i32*
+@a22 = external global i32
+@b22 = external global i32*
+@a23 = external global i32
+@b23 = external global i32*
+@a24 = external global i32
+@b24 = external global i32*
+@a25 = external global i32
+@b25 = external global i32*
+@a26 = external global i32
+@b26 = external global i32*
+@a27 = external global i32
+@b27 = external global i32*
+@a28 = external global i32
+@b28 = external global i32*
+@a29 = external global i32
+@b29 = external global i32*
+@c0 = external global i32*
+@c1 = external global i32*
+@c2 = external global i32*
+@c3 = external global i32*
+@c4 = external global i32*
+@c5 = external global i32*
+@c6 = external global i32*
+@c7 = external global i32*
+@c8 = external global i32*
+@c9 = external global i32*
+@c10 = external global i32*
+@c11 = external global i32*
+@c12 = external global i32*
+@c13 = external global i32*
+@c14 = external global i32*
+@c15 = external global i32*
+@c16 = external global i32*
+@c17 = external global i32*
+@c18 = external global i32*
+@c19 = external global i32*
+@c20 = external global i32*
+@c21 = external global i32*
+@c22 = external global i32*
+@c23 = external global i32*
+@c24 = external global i32*
+@c25 = external global i32*
+@c26 = external global i32*
+@c27 = external global i32*
+@c28 = external global i32*
+@c29 = external global i32*
+
+define i32 @f1() nounwind {
+entry:
+; CHECK: sw $ra, {{[0-9]+}}($sp) # 4-byte Folded Spill
+; CHECK: $ra
+; CHECK: lw $ra, {{[0-9]+}}($sp) # 4-byte Folded Reload
+; CHECK: jr $ra
+
+ %0 = load i32* @a0, align 4, !tbaa !0
+ %1 = load i32** @b0, align 4, !tbaa !3
+ store i32 %0, i32* %1, align 4, !tbaa !0
+ %2 = load i32* @a1, align 4, !tbaa !0
+ %3 = load i32** @b1, align 4, !tbaa !3
+ store i32 %2, i32* %3, align 4, !tbaa !0
+ %4 = load i32* @a2, align 4, !tbaa !0
+ %5 = load i32** @b2, align 4, !tbaa !3
+ store i32 %4, i32* %5, align 4, !tbaa !0
+ %6 = load i32* @a3, align 4, !tbaa !0
+ %7 = load i32** @b3, align 4, !tbaa !3
+ store i32 %6, i32* %7, align 4, !tbaa !0
+ %8 = load i32* @a4, align 4, !tbaa !0
+ %9 = load i32** @b4, align 4, !tbaa !3
+ store i32 %8, i32* %9, align 4, !tbaa !0
+ %10 = load i32* @a5, align 4, !tbaa !0
+ %11 = load i32** @b5, align 4, !tbaa !3
+ store i32 %10, i32* %11, align 4, !tbaa !0
+ %12 = load i32* @a6, align 4, !tbaa !0
+ %13 = load i32** @b6, align 4, !tbaa !3
+ store i32 %12, i32* %13, align 4, !tbaa !0
+ %14 = load i32* @a7, align 4, !tbaa !0
+ %15 = load i32** @b7, align 4, !tbaa !3
+ store i32 %14, i32* %15, align 4, !tbaa !0
+ %16 = load i32* @a8, align 4, !tbaa !0
+ %17 = load i32** @b8, align 4, !tbaa !3
+ store i32 %16, i32* %17, align 4, !tbaa !0
+ %18 = load i32* @a9, align 4, !tbaa !0
+ %19 = load i32** @b9, align 4, !tbaa !3
+ store i32 %18, i32* %19, align 4, !tbaa !0
+ %20 = load i32* @a10, align 4, !tbaa !0
+ %21 = load i32** @b10, align 4, !tbaa !3
+ store i32 %20, i32* %21, align 4, !tbaa !0
+ %22 = load i32* @a11, align 4, !tbaa !0
+ %23 = load i32** @b11, align 4, !tbaa !3
+ store i32 %22, i32* %23, align 4, !tbaa !0
+ %24 = load i32* @a12, align 4, !tbaa !0
+ %25 = load i32** @b12, align 4, !tbaa !3
+ store i32 %24, i32* %25, align 4, !tbaa !0
+ %26 = load i32* @a13, align 4, !tbaa !0
+ %27 = load i32** @b13, align 4, !tbaa !3
+ store i32 %26, i32* %27, align 4, !tbaa !0
+ %28 = load i32* @a14, align 4, !tbaa !0
+ %29 = load i32** @b14, align 4, !tbaa !3
+ store i32 %28, i32* %29, align 4, !tbaa !0
+ %30 = load i32* @a15, align 4, !tbaa !0
+ %31 = load i32** @b15, align 4, !tbaa !3
+ store i32 %30, i32* %31, align 4, !tbaa !0
+ %32 = load i32* @a16, align 4, !tbaa !0
+ %33 = load i32** @b16, align 4, !tbaa !3
+ store i32 %32, i32* %33, align 4, !tbaa !0
+ %34 = load i32* @a17, align 4, !tbaa !0
+ %35 = load i32** @b17, align 4, !tbaa !3
+ store i32 %34, i32* %35, align 4, !tbaa !0
+ %36 = load i32* @a18, align 4, !tbaa !0
+ %37 = load i32** @b18, align 4, !tbaa !3
+ store i32 %36, i32* %37, align 4, !tbaa !0
+ %38 = load i32* @a19, align 4, !tbaa !0
+ %39 = load i32** @b19, align 4, !tbaa !3
+ store i32 %38, i32* %39, align 4, !tbaa !0
+ %40 = load i32* @a20, align 4, !tbaa !0
+ %41 = load i32** @b20, align 4, !tbaa !3
+ store i32 %40, i32* %41, align 4, !tbaa !0
+ %42 = load i32* @a21, align 4, !tbaa !0
+ %43 = load i32** @b21, align 4, !tbaa !3
+ store i32 %42, i32* %43, align 4, !tbaa !0
+ %44 = load i32* @a22, align 4, !tbaa !0
+ %45 = load i32** @b22, align 4, !tbaa !3
+ store i32 %44, i32* %45, align 4, !tbaa !0
+ %46 = load i32* @a23, align 4, !tbaa !0
+ %47 = load i32** @b23, align 4, !tbaa !3
+ store i32 %46, i32* %47, align 4, !tbaa !0
+ %48 = load i32* @a24, align 4, !tbaa !0
+ %49 = load i32** @b24, align 4, !tbaa !3
+ store i32 %48, i32* %49, align 4, !tbaa !0
+ %50 = load i32* @a25, align 4, !tbaa !0
+ %51 = load i32** @b25, align 4, !tbaa !3
+ store i32 %50, i32* %51, align 4, !tbaa !0
+ %52 = load i32* @a26, align 4, !tbaa !0
+ %53 = load i32** @b26, align 4, !tbaa !3
+ store i32 %52, i32* %53, align 4, !tbaa !0
+ %54 = load i32* @a27, align 4, !tbaa !0
+ %55 = load i32** @b27, align 4, !tbaa !3
+ store i32 %54, i32* %55, align 4, !tbaa !0
+ %56 = load i32* @a28, align 4, !tbaa !0
+ %57 = load i32** @b28, align 4, !tbaa !3
+ store i32 %56, i32* %57, align 4, !tbaa !0
+ %58 = load i32* @a29, align 4, !tbaa !0
+ %59 = load i32** @b29, align 4, !tbaa !3
+ store i32 %58, i32* %59, align 4, !tbaa !0
+ %60 = load i32* @a0, align 4, !tbaa !0
+ %61 = load i32** @c0, align 4, !tbaa !3
+ store i32 %60, i32* %61, align 4, !tbaa !0
+ %62 = load i32* @a1, align 4, !tbaa !0
+ %63 = load i32** @c1, align 4, !tbaa !3
+ store i32 %62, i32* %63, align 4, !tbaa !0
+ %64 = load i32* @a2, align 4, !tbaa !0
+ %65 = load i32** @c2, align 4, !tbaa !3
+ store i32 %64, i32* %65, align 4, !tbaa !0
+ %66 = load i32* @a3, align 4, !tbaa !0
+ %67 = load i32** @c3, align 4, !tbaa !3
+ store i32 %66, i32* %67, align 4, !tbaa !0
+ %68 = load i32* @a4, align 4, !tbaa !0
+ %69 = load i32** @c4, align 4, !tbaa !3
+ store i32 %68, i32* %69, align 4, !tbaa !0
+ %70 = load i32* @a5, align 4, !tbaa !0
+ %71 = load i32** @c5, align 4, !tbaa !3
+ store i32 %70, i32* %71, align 4, !tbaa !0
+ %72 = load i32* @a6, align 4, !tbaa !0
+ %73 = load i32** @c6, align 4, !tbaa !3
+ store i32 %72, i32* %73, align 4, !tbaa !0
+ %74 = load i32* @a7, align 4, !tbaa !0
+ %75 = load i32** @c7, align 4, !tbaa !3
+ store i32 %74, i32* %75, align 4, !tbaa !0
+ %76 = load i32* @a8, align 4, !tbaa !0
+ %77 = load i32** @c8, align 4, !tbaa !3
+ store i32 %76, i32* %77, align 4, !tbaa !0
+ %78 = load i32* @a9, align 4, !tbaa !0
+ %79 = load i32** @c9, align 4, !tbaa !3
+ store i32 %78, i32* %79, align 4, !tbaa !0
+ %80 = load i32* @a10, align 4, !tbaa !0
+ %81 = load i32** @c10, align 4, !tbaa !3
+ store i32 %80, i32* %81, align 4, !tbaa !0
+ %82 = load i32* @a11, align 4, !tbaa !0
+ %83 = load i32** @c11, align 4, !tbaa !3
+ store i32 %82, i32* %83, align 4, !tbaa !0
+ %84 = load i32* @a12, align 4, !tbaa !0
+ %85 = load i32** @c12, align 4, !tbaa !3
+ store i32 %84, i32* %85, align 4, !tbaa !0
+ %86 = load i32* @a13, align 4, !tbaa !0
+ %87 = load i32** @c13, align 4, !tbaa !3
+ store i32 %86, i32* %87, align 4, !tbaa !0
+ %88 = load i32* @a14, align 4, !tbaa !0
+ %89 = load i32** @c14, align 4, !tbaa !3
+ store i32 %88, i32* %89, align 4, !tbaa !0
+ %90 = load i32* @a15, align 4, !tbaa !0
+ %91 = load i32** @c15, align 4, !tbaa !3
+ store i32 %90, i32* %91, align 4, !tbaa !0
+ %92 = load i32* @a16, align 4, !tbaa !0
+ %93 = load i32** @c16, align 4, !tbaa !3
+ store i32 %92, i32* %93, align 4, !tbaa !0
+ %94 = load i32* @a17, align 4, !tbaa !0
+ %95 = load i32** @c17, align 4, !tbaa !3
+ store i32 %94, i32* %95, align 4, !tbaa !0
+ %96 = load i32* @a18, align 4, !tbaa !0
+ %97 = load i32** @c18, align 4, !tbaa !3
+ store i32 %96, i32* %97, align 4, !tbaa !0
+ %98 = load i32* @a19, align 4, !tbaa !0
+ %99 = load i32** @c19, align 4, !tbaa !3
+ store i32 %98, i32* %99, align 4, !tbaa !0
+ %100 = load i32* @a20, align 4, !tbaa !0
+ %101 = load i32** @c20, align 4, !tbaa !3
+ store i32 %100, i32* %101, align 4, !tbaa !0
+ %102 = load i32* @a21, align 4, !tbaa !0
+ %103 = load i32** @c21, align 4, !tbaa !3
+ store i32 %102, i32* %103, align 4, !tbaa !0
+ %104 = load i32* @a22, align 4, !tbaa !0
+ %105 = load i32** @c22, align 4, !tbaa !3
+ store i32 %104, i32* %105, align 4, !tbaa !0
+ %106 = load i32* @a23, align 4, !tbaa !0
+ %107 = load i32** @c23, align 4, !tbaa !3
+ store i32 %106, i32* %107, align 4, !tbaa !0
+ %108 = load i32* @a24, align 4, !tbaa !0
+ %109 = load i32** @c24, align 4, !tbaa !3
+ store i32 %108, i32* %109, align 4, !tbaa !0
+ %110 = load i32* @a25, align 4, !tbaa !0
+ %111 = load i32** @c25, align 4, !tbaa !3
+ store i32 %110, i32* %111, align 4, !tbaa !0
+ %112 = load i32* @a26, align 4, !tbaa !0
+ %113 = load i32** @c26, align 4, !tbaa !3
+ store i32 %112, i32* %113, align 4, !tbaa !0
+ %114 = load i32* @a27, align 4, !tbaa !0
+ %115 = load i32** @c27, align 4, !tbaa !3
+ store i32 %114, i32* %115, align 4, !tbaa !0
+ %116 = load i32* @a28, align 4, !tbaa !0
+ %117 = load i32** @c28, align 4, !tbaa !3
+ store i32 %116, i32* %117, align 4, !tbaa !0
+ %118 = load i32* @a29, align 4, !tbaa !0
+ %119 = load i32** @c29, align 4, !tbaa !3
+ store i32 %118, i32* %119, align 4, !tbaa !0
+ %120 = load i32* @a0, align 4, !tbaa !0
+ ret i32 %120
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"any pointer", metadata !1}
diff --git a/test/CodeGen/Mips/rdhwr-directives.ll b/test/CodeGen/Mips/rdhwr-directives.ll
new file mode 100644
index 000000000000..27010d4d3699
--- /dev/null
+++ b/test/CodeGen/Mips/rdhwr-directives.ll
@@ -0,0 +1,15 @@
+; RUN: llc -march=mipsel -mcpu=mips32 -relocation-model=static < %s | FileCheck %s
+
+@a = external thread_local global i32
+
+define i32 @foo() nounwind readonly {
+entry:
+; CHECK: .set push
+; CHECK: .set mips32r2
+; CHECK: rdhwr
+; CHECK: .set pop
+
+ %0 = load i32* @a, align 4
+ ret i32 %0
+}
+
diff --git a/test/CodeGen/Mips/return_address.ll b/test/CodeGen/Mips/return_address.ll
new file mode 100644
index 000000000000..e1c9241984ca
--- /dev/null
+++ b/test/CodeGen/Mips/return_address.ll
@@ -0,0 +1,23 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+define i8* @f1() nounwind {
+entry:
+ %0 = call i8* @llvm.returnaddress(i32 0)
+ ret i8* %0
+
+; CHECK: addu $2, $zero, $ra
+}
+
+define i8* @f2() nounwind {
+entry:
+ call void @g()
+ %0 = call i8* @llvm.returnaddress(i32 0)
+ ret i8* %0
+
+; CHECK: addu $[[R0:[0-9]+]], $zero, $ra
+; CHECK: jal
+; CHECK: addu $2, $zero, $[[R0]]
+}
+
+declare i8* @llvm.returnaddress(i32) nounwind readnone
+declare void @g()
diff --git a/test/CodeGen/Mips/sb1.ll b/test/CodeGen/Mips/sb1.ll
new file mode 100644
index 000000000000..e1a28d459548
--- /dev/null
+++ b/test/CodeGen/Mips/sb1.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 97, align 4
+@c = common global i8 0, align 1
+@.str = private unnamed_addr constant [8 x i8] c"%i %c \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+ %0 = load i32* @i, align 4
+ %conv = trunc i32 %0 to i8
+ store i8 %conv, i8* @c, align 1
+ %1 = load i32* @i, align 4
+ %2 = load i8* @c, align 1
+ %conv1 = sext i8 %2 to i32
+; 16: sb ${{[0-9]+}}, 0(${{[0-9]+}})
+ %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 %1, i32 %conv1)
+ ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/selectcc.ll b/test/CodeGen/Mips/selectcc.ll
new file mode 100644
index 000000000000..a17517e7d145
--- /dev/null
+++ b/test/CodeGen/Mips/selectcc.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=mipsel < %s
+
+@gf0 = external global float
+@gf1 = external global float
+@gd0 = external global double
+@gd1 = external global double
+
+define float @select_cc_f32(float %a, float %b) nounwind {
+entry:
+ store float 0.000000e+00, float* @gf0, align 4
+ store float 1.000000e+00, float* @gf1, align 4
+ %cmp = fcmp olt float %a, %b
+ %conv = zext i1 %cmp to i32
+ %conv1 = sitofp i32 %conv to float
+ ret float %conv1
+}
+
+define double @select_cc_f64(double %a, double %b) nounwind {
+entry:
+ store double 0.000000e+00, double* @gd0, align 8
+ store double 1.000000e+00, double* @gd1, align 8
+ %cmp = fcmp olt double %a, %b
+ %conv = zext i1 %cmp to i32
+ %conv1 = sitofp i32 %conv to double
+ ret double %conv1
+}
+
diff --git a/test/CodeGen/Mips/sh1.ll b/test/CodeGen/Mips/sh1.ll
new file mode 100644
index 000000000000..1746ae284f2a
--- /dev/null
+++ b/test/CodeGen/Mips/sh1.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 97, align 4
+@s = common global i16 0, align 2
+@.str = private unnamed_addr constant [9 x i8] c"%i %hi \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+ %0 = load i32* @i, align 4
+ %conv = trunc i32 %0 to i16
+ store i16 %conv, i16* @s, align 2
+ %1 = load i32* @i, align 4
+ %2 = load i16* @s, align 2
+ %conv1 = sext i16 %2 to i32
+; 16: sh ${{[0-9]+}}, 0(${{[0-9]+}})
+ %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i32 %1, i32 %conv1)
+ ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/shift-parts.ll b/test/CodeGen/Mips/shift-parts.ll
new file mode 100644
index 000000000000..38cbf28108ca
--- /dev/null
+++ b/test/CodeGen/Mips/shift-parts.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+define i64 @shl0(i64 %a, i32 %b) nounwind readnone {
+entry:
+; CHECK: shl0
+; CHECK-NOT: lw $25, %call16(__
+ %sh_prom = zext i32 %b to i64
+ %shl = shl i64 %a, %sh_prom
+ ret i64 %shl
+}
+
+define i64 @shr1(i64 %a, i32 %b) nounwind readnone {
+entry:
+; CHECK: shr1
+; CHECK-NOT: lw $25, %call16(__
+ %sh_prom = zext i32 %b to i64
+ %shr = lshr i64 %a, %sh_prom
+ ret i64 %shr
+}
+
+define i64 @sra2(i64 %a, i32 %b) nounwind readnone {
+entry:
+; CHECK: sra2
+; CHECK-NOT: lw $25, %call16(__
+ %sh_prom = zext i32 %b to i64
+ %shr = ashr i64 %a, %sh_prom
+ ret i64 %shr
+}
+
diff --git a/test/CodeGen/Mips/sitofp-selectcc-opt.ll b/test/CodeGen/Mips/sitofp-selectcc-opt.ll
new file mode 100644
index 000000000000..576cbd8e9637
--- /dev/null
+++ b/test/CodeGen/Mips/sitofp-selectcc-opt.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+@foo12.d4 = internal unnamed_addr global double 0.000000e+00, align 8
+
+define double @foo12(i32 %a, i32, i64 %b) nounwind {
+entry:
+; check that this transformation doesn't happen:
+; (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
+;
+; CHECK-NOT: # double -1.000000e+00
+
+ %tobool1 = icmp ne i32 %a, 0
+ %not.tobool = icmp ne i64 %b, 0
+ %tobool1. = or i1 %tobool1, %not.tobool
+ %lor.ext = zext i1 %tobool1. to i32
+ %conv = sitofp i32 %lor.ext to double
+ %1 = load double* @foo12.d4, align 8
+ %add = fadd double %conv, %1
+ store double %add, double* @foo12.d4, align 8
+ ret double %add
+}
+
diff --git a/test/CodeGen/Mips/sll1.ll b/test/CodeGen/Mips/sll1.ll
new file mode 100644
index 000000000000..fdcd38c84b3a
--- /dev/null
+++ b/test/CodeGen/Mips/sll1.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 10, align 4
+@j = global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+; 16: sll ${{[0-9]+}}, ${{[0-9]+}}, {{[0-9]+}}
+ %0 = load i32* @i, align 4
+ %shl = shl i32 %0, 4
+; 16: sll ${{[0-9]+}}, ${{[0-9]+}}, {{[0-9]+}}
+ store i32 %shl, i32* @j, align 4
+ %1 = load i32* @j, align 4
+ %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %1)
+ ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/sll2.ll b/test/CodeGen/Mips/sll2.ll
new file mode 100644
index 000000000000..c2af454cc853
--- /dev/null
+++ b/test/CodeGen/Mips/sll2.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 10, align 4
+@j = global i32 4, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+ %0 = load i32* @i, align 4
+ %1 = load i32* @j, align 4
+ %shl = shl i32 %0, %1
+; 16: sllv ${{[0-9]+}}, ${{[0-9]+}}
+ store i32 %shl, i32* @i, align 4
+ %2 = load i32* @j, align 4
+ %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %2)
+ ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/sra1.ll b/test/CodeGen/Mips/sra1.ll
new file mode 100644
index 000000000000..15bf8d644ea3
--- /dev/null
+++ b/test/CodeGen/Mips/sra1.ll
@@ -0,0 +1,15 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 -354, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+ %0 = load i32* @i, align 4
+ %shr = ashr i32 %0, 3
+; 16: sra ${{[0-9]+}}, ${{[0-9]+}}, {{[0-9]+}}
+ %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %shr)
+ ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/sra2.ll b/test/CodeGen/Mips/sra2.ll
new file mode 100644
index 000000000000..26bf19d44020
--- /dev/null
+++ b/test/CodeGen/Mips/sra2.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 -354, align 4
+@j = global i32 3, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+ %0 = load i32* @i, align 4
+ %1 = load i32* @j, align 4
+ %shr = ashr i32 %0, %1
+; 16: srav ${{[0-9]+}}, ${{[0-9]+}}
+ %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %shr)
+ ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/srl1.ll b/test/CodeGen/Mips/srl1.ll
new file mode 100644
index 000000000000..3474283faef9
--- /dev/null
+++ b/test/CodeGen/Mips/srl1.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 10654, align 4
+@j = global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+ %0 = load i32* @i, align 4
+ %shr = lshr i32 %0, 4
+; 16: srl ${{[0-9]+}}, ${{[0-9]+}}, {{[0-9]+}}
+ store i32 %shr, i32* @j, align 4
+ %1 = load i32* @j, align 4
+ %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %1)
+ ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/srl2.ll b/test/CodeGen/Mips/srl2.ll
new file mode 100644
index 000000000000..26ec0927a559
--- /dev/null
+++ b/test/CodeGen/Mips/srl2.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 10654, align 4
+@j = global i32 0, align 4
+@k = global i32 4, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+ %0 = load i32* @i, align 4
+ %1 = load i32* @k, align 4
+ %shr = lshr i32 %0, %1
+; 16: srlv ${{[0-9]+}}, ${{[0-9]+}}
+ store i32 %shr, i32* @j, align 4
+ %2 = load i32* @j, align 4
+ %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %2)
+ ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/stacksize.ll b/test/CodeGen/Mips/stacksize.ll
new file mode 100644
index 000000000000..42021b215186
--- /dev/null
+++ b/test/CodeGen/Mips/stacksize.ll
@@ -0,0 +1,9 @@
+; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s
+
+define i32 @foo(i32 %a) nounwind readnone {
+entry:
+; check that stack size is zero.
+; CHECK-NOT: addiu $sp, $sp
+ %add = add nsw i32 %a, 1
+ ret i32 %add
+}
diff --git a/test/CodeGen/Mips/sub1.ll b/test/CodeGen/Mips/sub1.ll
new file mode 100644
index 000000000000..195750b805d6
--- /dev/null
+++ b/test/CodeGen/Mips/sub1.ll
@@ -0,0 +1,15 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 10, align 4
+@.str = private unnamed_addr constant [4 x i8] c"%i\0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+ %0 = load i32* @i, align 4
+ %sub = sub nsw i32 %0, 5
+; 16: addiu ${{[0-9]+}}, -{{[0-9]+}}
+ %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %sub)
+ ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/sub2.ll b/test/CodeGen/Mips/sub2.ll
new file mode 100644
index 000000000000..4f6bfccec401
--- /dev/null
+++ b/test/CodeGen/Mips/sub2.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 10, align 4
+@j = global i32 20, align 4
+@.str = private unnamed_addr constant [4 x i8] c"%i\0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+ %0 = load i32* @j, align 4
+ %1 = load i32* @i, align 4
+ %sub = sub nsw i32 %0, %1
+; 16: subu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+ %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %sub)
+ ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/swzero.ll b/test/CodeGen/Mips/swzero.ll
index da1e036eb997..9f91a3902d7d 100644
--- a/test/CodeGen/Mips/swzero.ll
+++ b/test/CodeGen/Mips/swzero.ll
@@ -4,7 +4,8 @@
define void @zero_u(%struct.unaligned* nocapture %p) nounwind {
entry:
-; CHECK: usw $zero
+; CHECK: swl $zero
+; CHECK: swr $zero
%x = getelementptr inbounds %struct.unaligned* %p, i32 0, i32 0
store i32 0, i32* %x, align 1
ret void
diff --git a/test/CodeGen/Mips/tls-alias.ll b/test/CodeGen/Mips/tls-alias.ll
new file mode 100644
index 000000000000..d681091f4c14
--- /dev/null
+++ b/test/CodeGen/Mips/tls-alias.ll
@@ -0,0 +1,10 @@
+; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s
+
+@foo = thread_local global i32 42
+@bar = hidden alias i32* @foo
+
+define i32* @zed() {
+; CHECK: __tls_get_addr
+; CHECK-NEXT: %tlsgd(bar)
+ ret i32* @bar
+}
diff --git a/test/CodeGen/Mips/tls-models.ll b/test/CodeGen/Mips/tls-models.ll
new file mode 100644
index 000000000000..8f5789ec7995
--- /dev/null
+++ b/test/CodeGen/Mips/tls-models.ll
@@ -0,0 +1,113 @@
+; RUN: llc -march=mipsel < %s | FileCheck -check-prefix=CHECK-PIC %s
+; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck -check-prefix=CHECK-NONPIC %s
+
+@external_gd = external thread_local global i32
+@internal_gd = internal thread_local global i32 42
+
+@external_ld = external thread_local(localdynamic) global i32
+@internal_ld = internal thread_local(localdynamic) global i32 42
+
+@external_ie = external thread_local(initialexec) global i32
+@internal_ie = internal thread_local(initialexec) global i32 42
+
+@external_le = external thread_local(localexec) global i32
+@internal_le = internal thread_local(localexec) global i32 42
+
+; ----- no model specified -----
+
+define i32* @f1() {
+entry:
+ ret i32* @external_gd
+
+ ; Non-PIC code can use initial-exec, PIC code has to use general dynamic.
+ ; CHECK-NONPIC: f1:
+ ; CHECK-NONPIC: %gottprel
+ ; CHECK-PIC: f1:
+ ; CHECK-PIC: %tlsgd
+}
+
+define i32* @f2() {
+entry:
+ ret i32* @internal_gd
+
+ ; Non-PIC code can use local exec, PIC code can use local dynamic.
+ ; CHECK-NONPIC: f2:
+ ; CHECK-NONPIC: %tprel_hi
+ ; CHECK-PIC: f2:
+ ; CHECK-PIC: %tlsldm
+}
+
+
+; ----- localdynamic specified -----
+
+define i32* @f3() {
+entry:
+ ret i32* @external_ld
+
+ ; Non-PIC code can use initial exec, PIC should use local dynamic.
+ ; CHECK-NONPIC: f3:
+ ; CHECK-NONPIC: %gottprel
+ ; CHECK-PIC: f3:
+ ; CHECK-PIC: %tlsldm
+}
+
+define i32* @f4() {
+entry:
+ ret i32* @internal_ld
+
+ ; Non-PIC code can use local exec, PIC code can use local dynamic.
+ ; CHECK-NONPIC: f4:
+ ; CHECK-NONPIC: %tprel_hi
+ ; CHECK-PIC: f4:
+ ; CHECK-PIC: %tlsldm
+}
+
+
+; ----- initialexec specified -----
+
+define i32* @f5() {
+entry:
+ ret i32* @external_ie
+
+ ; Non-PIC and PIC code will use initial exec as specified.
+ ; CHECK-NONPIC: f5:
+ ; CHECK-NONPIC: %gottprel
+ ; CHECK-PIC: f5:
+ ; CHECK-PIC: %gottprel
+}
+
+define i32* @f6() {
+entry:
+ ret i32* @internal_ie
+
+ ; Non-PIC code can use local exec, PIC code use initial exec as specified.
+ ; CHECK-NONPIC: f6:
+ ; CHECK-NONPIC: %tprel_hi
+ ; CHECK-PIC: f6:
+ ; CHECK-PIC: %gottprel
+}
+
+
+; ----- localexec specified -----
+
+define i32* @f7() {
+entry:
+ ret i32* @external_le
+
+ ; Non-PIC and PIC code will use local exec as specified.
+ ; CHECK-NONPIC: f7:
+ ; CHECK-NONPIC: %tprel_hi
+ ; CHECK-PIC: f7:
+ ; CHECK-PIC: %tprel_hi
+}
+
+define i32* @f8() {
+entry:
+ ret i32* @internal_le
+
+ ; Non-PIC and PIC code will use local exec as specified.
+ ; CHECK-NONPIC: f8:
+ ; CHECK-NONPIC: %tprel_hi
+ ; CHECK-PIC: f8:
+ ; CHECK-PIC: %tprel_hi
+}
diff --git a/test/CodeGen/Mips/tls.ll b/test/CodeGen/Mips/tls.ll
index a3c4768bb4b5..a7ddb96e4338 100644
--- a/test/CodeGen/Mips/tls.ll
+++ b/test/CodeGen/Mips/tls.ll
@@ -13,8 +13,9 @@ entry:
; CHECK: f1:
-; PIC: lw $25, %call16(__tls_get_addr)($gp)
-; PIC: addiu $4, $gp, %tlsgd(t1)
+; PIC: addu $[[R0:[a-z0-9]+]], $2, $25
+; PIC: lw $25, %call16(__tls_get_addr)($[[R0]])
+; PIC: addiu $4, $[[R0]], %tlsgd(t1)
; PIC: jalr $25
; PIC: lw $2, 0($2)
@@ -35,18 +36,19 @@ entry:
; CHECK: f2:
-; PIC: lw $25, %call16(__tls_get_addr)($gp)
-; PIC: addiu $4, $gp, %tlsgd(t2)
+; PIC: addu $[[R0:[a-z0-9]+]], $2, $25
+; PIC: lw $25, %call16(__tls_get_addr)($[[R0]])
+; PIC: addiu $4, $[[R0]], %tlsgd(t2)
; PIC: jalr $25
; PIC: lw $2, 0($2)
; STATICGP: lui $[[R0:[0-9]+]], %hi(__gnu_local_gp)
; STATICGP: addiu $[[GP:[0-9]+]], $[[R0]], %lo(__gnu_local_gp)
; STATICGP: lw ${{[0-9]+}}, %gottprel(t2)($[[GP]])
-; STATIC: lui $gp, %hi(__gnu_local_gp)
-; STATIC: addiu $gp, $gp, %lo(__gnu_local_gp)
+; STATIC: lui $[[R0:[0-9]+]], %hi(__gnu_local_gp)
+; STATIC: addiu $[[GP:[0-9]+]], $[[R0]], %lo(__gnu_local_gp)
; STATIC: rdhwr $3, $29
-; STATIC: lw $[[R0:[0-9]+]], %gottprel(t2)($gp)
+; STATIC: lw $[[R0:[0-9]+]], %gottprel(t2)($[[GP]])
; STATIC: addu $[[R1:[0-9]+]], $3, $[[R0]]
; STATIC: lw $2, 0($[[R1]])
}
@@ -57,7 +59,7 @@ define i32 @f3() nounwind {
entry:
; CHECK: f3:
-; PIC: addiu $4, $gp, %tlsldm(f3.i)
+; PIC: addiu $4, ${{[a-z0-9]+}}, %tlsldm(f3.i)
; PIC: jalr $25
; PIC: lui $[[R0:[0-9]+]], %dtprel_hi(f3.i)
; PIC: addu $[[R1:[0-9]+]], $[[R0]], $2
diff --git a/test/CodeGen/Mips/unalignedload.ll b/test/CodeGen/Mips/unalignedload.ll
index 6a087ba46e64..7f880b6fe379 100644
--- a/test/CodeGen/Mips/unalignedload.ll
+++ b/test/CodeGen/Mips/unalignedload.ll
@@ -9,27 +9,17 @@
define void @foo1() nounwind {
entry:
-; CHECK-EL: ulhu $4, 2
-; CHECK-EL: lw $25, %call16(foo2)
-; CHECK-EL: lw $[[R0:[0-9]+]], %got(s4)
-; CHECK-EL: lbu $[[R1:[0-9]+]], 6($[[R0]])
-; CHECK-EL: sll $[[R3:[0-9]+]], $[[R1]], 16
-; CHECK-EL: ulhu $[[R2:[0-9]+]], 4($[[R0]])
-; CHECK-EL: or $5, $[[R2]], $[[R3]]
-; CHECK-EL: ulw $4, 0($[[R0]])
-; CHECK-EL: lw $25, %call16(foo4)
+; CHECK-EL: lbu ${{[0-9]+}}, 2($[[R0:[0-9]+]])
+; CHECK-EL: lbu ${{[0-9]+}}, 3($[[R0]])
+; CHECK-EL: jalr
+; CHECK-EL: lwl $[[R1:[0-9]+]], 3($[[R2:[0-9]+]])
+; CHECK-EL: lwr $[[R1]], 0($[[R2]])
-; CHECK-EB: ulhu $[[R0:[0-9]+]], 2
-; CHECK-EB: sll $4, $[[R0]], 16
-; CHECK-EB: lw $25, %call16(foo2)
-; CHECK-EB: lw $[[R1:[0-9]+]], %got(s4)
-; CHECK-EB: lbu $[[R3:[0-9]+]], 6($[[R1]])
-; CHECK-EB: sll $[[R5:[0-9]+]], $[[R3]], 8
-; CHECK-EB: ulhu $[[R2:[0-9]+]], 4($[[R1]])
-; CHECK-EB: sll $[[R4:[0-9]+]], $[[R2]], 16
-; CHECK-EB: or $5, $[[R4]], $[[R5]]
-; CHECK-EB: ulw $4, 0($[[R1]])
-; CHECK-EB: lw $25, %call16(foo4)
+; CHECK-EB: lbu ${{[0-9]+}}, 3($[[R0:[0-9]+]])
+; CHECK-EB: lbu ${{[0-9]+}}, 2($[[R0]])
+; CHECK-EB: jalr
+; CHECK-EB: lwl $[[R1:[0-9]+]], 0($[[R2:[0-9]+]])
+; CHECK-BE: lwr $[[R1]], 3($[[R2]])
tail call void @foo2(%struct.S1* byval getelementptr inbounds (%struct.S2* @s2, i32 0, i32 1)) nounwind
tail call void @foo4(%struct.S4* byval @s4) nounwind
diff --git a/test/CodeGen/Mips/xor1.ll b/test/CodeGen/Mips/xor1.ll
new file mode 100644
index 000000000000..f2c13169cf7a
--- /dev/null
+++ b/test/CodeGen/Mips/xor1.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@x = global i32 65504, align 4
+@y = global i32 60929, align 4
+@.str = private unnamed_addr constant [7 x i8] c"%08x \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+ %0 = load i32* @x, align 4
+ %1 = load i32* @y, align 4
+ %xor = xor i32 %0, %1
+; 16: xor ${{[0-9]+}}, ${{[0-9]+}}
+ %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), i32 %xor)
+ ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/zeroreg.ll b/test/CodeGen/Mips/zeroreg.ll
index b890e1dba9fc..79ed6091f887 100644
--- a/test/CodeGen/Mips/zeroreg.ll
+++ b/test/CodeGen/Mips/zeroreg.ll
@@ -4,8 +4,7 @@
define i32 @foo0(i32 %s) nounwind readonly {
entry:
-; CHECK-NOT: addiu
-; CHECK: movn
+; CHECK: movn ${{[0-9]+}}, $zero
%tobool = icmp ne i32 %s, 0
%0 = load i32* @g1, align 4, !tbaa !0
%cond = select i1 %tobool, i32 0, i32 %0
@@ -14,8 +13,7 @@ entry:
define i32 @foo1(i32 %s) nounwind readonly {
entry:
-; CHECK-NOT: addiu
-; CHECK: movz
+; CHECK: movz ${{[0-9]+}}, $zero
%tobool = icmp ne i32 %s, 0
%0 = load i32* @g1, align 4, !tbaa !0
%cond = select i1 %tobool, i32 %0, i32 0
diff --git a/test/CodeGen/NVPTX/annotations.ll b/test/CodeGen/NVPTX/annotations.ll
new file mode 100644
index 000000000000..d93f688ef1fd
--- /dev/null
+++ b/test/CodeGen/NVPTX/annotations.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+
+@texture = internal addrspace(1) global i64 0, align 8
+; CHECK: .global .texref texture
+@surface = internal addrspace(1) global i64 0, align 8
+; CHECK: .global .surfref surface
+
+
+; CHECK: .entry kernel_func_maxntid
+define void @kernel_func_maxntid(float* %a) {
+; CHECK: .maxntid 10, 20, 30
+; CHECK: ret
+ ret void
+}
+
+; CHECK: .entry kernel_func_reqntid
+define void @kernel_func_reqntid(float* %a) {
+; CHECK: .reqntid 11, 22, 33
+; CHECK: ret
+ ret void
+}
+
+; CHECK: .entry kernel_func_minctasm
+define void @kernel_func_minctasm(float* %a) {
+; CHECK: .minnctapersm 42
+; CHECK: ret
+ ret void
+}
+
+
+
+!nvvm.annotations = !{!1, !2, !3, !4, !5, !6, !7, !8}
+
+!1 = metadata !{void (float*)* @kernel_func_maxntid, metadata !"kernel", i32 1}
+!2 = metadata !{void (float*)* @kernel_func_maxntid,
+ metadata !"maxntidx", i32 10,
+ metadata !"maxntidy", i32 20,
+ metadata !"maxntidz", i32 30}
+
+!3 = metadata !{void (float*)* @kernel_func_reqntid, metadata !"kernel", i32 1}
+!4 = metadata !{void (float*)* @kernel_func_reqntid,
+ metadata !"reqntidx", i32 11,
+ metadata !"reqntidy", i32 22,
+ metadata !"reqntidz", i32 33}
+
+!5 = metadata !{void (float*)* @kernel_func_minctasm, metadata !"kernel", i32 1}
+!6 = metadata !{void (float*)* @kernel_func_minctasm,
+ metadata !"minctasm", i32 42}
+
+!7 = metadata !{i64 addrspace(1)* @texture, metadata !"texture", i32 1}
+!8 = metadata !{i64 addrspace(1)* @surface, metadata !"surface", i32 1}
diff --git a/test/CodeGen/NVPTX/arithmetic-fp-sm10.ll b/test/CodeGen/NVPTX/arithmetic-fp-sm10.ll
new file mode 100644
index 000000000000..73c77f56bc9c
--- /dev/null
+++ b/test/CodeGen/NVPTX/arithmetic-fp-sm10.ll
@@ -0,0 +1,72 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
+
+;; These tests should run for all targets
+
+;;===-- Basic instruction selection tests ---------------------------------===;;
+
+
+;;; f64
+
+define double @fadd_f64(double %a, double %b) {
+; CHECK: add.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
+; CHECK: ret
+ %ret = fadd double %a, %b
+ ret double %ret
+}
+
+define double @fsub_f64(double %a, double %b) {
+; CHECK: sub.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
+; CHECK: ret
+ %ret = fsub double %a, %b
+ ret double %ret
+}
+
+define double @fmul_f64(double %a, double %b) {
+; CHECK: mul.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
+; CHECK: ret
+ %ret = fmul double %a, %b
+ ret double %ret
+}
+
+define double @fdiv_f64(double %a, double %b) {
+; CHECK: div.rn.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
+; CHECK: ret
+ %ret = fdiv double %a, %b
+ ret double %ret
+}
+
+;; PTX does not have a floating-point rem instruction
+
+
+;;; f32
+
+define float @fadd_f32(float %a, float %b) {
+; CHECK: add.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
+; CHECK: ret
+ %ret = fadd float %a, %b
+ ret float %ret
+}
+
+define float @fsub_f32(float %a, float %b) {
+; CHECK: sub.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
+; CHECK: ret
+ %ret = fsub float %a, %b
+ ret float %ret
+}
+
+define float @fmul_f32(float %a, float %b) {
+; CHECK: mul.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
+; CHECK: ret
+ %ret = fmul float %a, %b
+ ret float %ret
+}
+
+define float @fdiv_f32(float %a, float %b) {
+; CHECK: div.full.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
+; CHECK: ret
+ %ret = fdiv float %a, %b
+ ret float %ret
+}
+
+;; PTX does not have a floating-point rem instruction
diff --git a/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll b/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll
new file mode 100644
index 000000000000..e474fa4df5ce
--- /dev/null
+++ b/test/CodeGen/NVPTX/arithmetic-fp-sm20.ll
@@ -0,0 +1,72 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+;; These tests should run for all targets
+
+;;===-- Basic instruction selection tests ---------------------------------===;;
+
+
+;;; f64
+
+define double @fadd_f64(double %a, double %b) {
+; CHECK: add.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
+; CHECK: ret
+ %ret = fadd double %a, %b
+ ret double %ret
+}
+
+define double @fsub_f64(double %a, double %b) {
+; CHECK: sub.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
+; CHECK: ret
+ %ret = fsub double %a, %b
+ ret double %ret
+}
+
+define double @fmul_f64(double %a, double %b) {
+; CHECK: mul.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
+; CHECK: ret
+ %ret = fmul double %a, %b
+ ret double %ret
+}
+
+define double @fdiv_f64(double %a, double %b) {
+; CHECK: div.rn.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
+; CHECK: ret
+ %ret = fdiv double %a, %b
+ ret double %ret
+}
+
+;; PTX does not have a floating-point rem instruction
+
+
+;;; f32
+
+define float @fadd_f32(float %a, float %b) {
+; CHECK: add.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
+; CHECK: ret
+ %ret = fadd float %a, %b
+ ret float %ret
+}
+
+define float @fsub_f32(float %a, float %b) {
+; CHECK: sub.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
+; CHECK: ret
+ %ret = fsub float %a, %b
+ ret float %ret
+}
+
+define float @fmul_f32(float %a, float %b) {
+; CHECK: mul.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
+; CHECK: ret
+ %ret = fmul float %a, %b
+ ret float %ret
+}
+
+define float @fdiv_f32(float %a, float %b) {
+; CHECK: div.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
+; CHECK: ret
+ %ret = fdiv float %a, %b
+ ret float %ret
+}
+
+;; PTX does not have a floating-point rem instruction
diff --git a/test/CodeGen/NVPTX/arithmetic-int.ll b/test/CodeGen/NVPTX/arithmetic-int.ll
new file mode 100644
index 000000000000..529f84900afd
--- /dev/null
+++ b/test/CodeGen/NVPTX/arithmetic-int.ll
@@ -0,0 +1,295 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+;; These tests should run for all targets
+
+;;===-- Basic instruction selection tests ---------------------------------===;;
+
+
+;;; i64
+
+define i64 @add_i64(i64 %a, i64 %b) {
+; CHECK: add.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: ret
+ %ret = add i64 %a, %b
+ ret i64 %ret
+}
+
+define i64 @sub_i64(i64 %a, i64 %b) {
+; CHECK: sub.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: ret
+ %ret = sub i64 %a, %b
+ ret i64 %ret
+}
+
+define i64 @mul_i64(i64 %a, i64 %b) {
+; CHECK: mul.lo.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: ret
+ %ret = mul i64 %a, %b
+ ret i64 %ret
+}
+
+define i64 @sdiv_i64(i64 %a, i64 %b) {
+; CHECK: div.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: ret
+ %ret = sdiv i64 %a, %b
+ ret i64 %ret
+}
+
+define i64 @udiv_i64(i64 %a, i64 %b) {
+; CHECK: div.u64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: ret
+ %ret = udiv i64 %a, %b
+ ret i64 %ret
+}
+
+define i64 @srem_i64(i64 %a, i64 %b) {
+; CHECK: rem.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: ret
+ %ret = srem i64 %a, %b
+ ret i64 %ret
+}
+
+define i64 @urem_i64(i64 %a, i64 %b) {
+; CHECK: rem.u64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: ret
+ %ret = urem i64 %a, %b
+ ret i64 %ret
+}
+
+define i64 @and_i64(i64 %a, i64 %b) {
+; CHECK: and.b64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: ret
+ %ret = and i64 %a, %b
+ ret i64 %ret
+}
+
+define i64 @or_i64(i64 %a, i64 %b) {
+; CHECK: or.b64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: ret
+ %ret = or i64 %a, %b
+ ret i64 %ret
+}
+
+define i64 @xor_i64(i64 %a, i64 %b) {
+; CHECK: xor.b64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: ret
+ %ret = xor i64 %a, %b
+ ret i64 %ret
+}
+
+define i64 @shl_i64(i64 %a, i64 %b) {
+; PTX requires 32-bit shift amount
+; CHECK: shl.b64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+ %ret = shl i64 %a, %b
+ ret i64 %ret
+}
+
+define i64 @ashr_i64(i64 %a, i64 %b) {
+; PTX requires 32-bit shift amount
+; CHECK: shr.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+ %ret = ashr i64 %a, %b
+ ret i64 %ret
+}
+
+define i64 @lshr_i64(i64 %a, i64 %b) {
+; PTX requires 32-bit shift amount
+; CHECK: shr.u64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+ %ret = lshr i64 %a, %b
+ ret i64 %ret
+}
+
+
+;;; i32
+
+define i32 @add_i32(i32 %a, i32 %b) {
+; CHECK: add.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+ %ret = add i32 %a, %b
+ ret i32 %ret
+}
+
+define i32 @sub_i32(i32 %a, i32 %b) {
+; CHECK: sub.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+ %ret = sub i32 %a, %b
+ ret i32 %ret
+}
+
+define i32 @mul_i32(i32 %a, i32 %b) {
+; CHECK: mul.lo.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+ %ret = mul i32 %a, %b
+ ret i32 %ret
+}
+
+define i32 @sdiv_i32(i32 %a, i32 %b) {
+; CHECK: div.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+ %ret = sdiv i32 %a, %b
+ ret i32 %ret
+}
+
+define i32 @udiv_i32(i32 %a, i32 %b) {
+; CHECK: div.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+ %ret = udiv i32 %a, %b
+ ret i32 %ret
+}
+
+define i32 @srem_i32(i32 %a, i32 %b) {
+; CHECK: rem.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+ %ret = srem i32 %a, %b
+ ret i32 %ret
+}
+
+define i32 @urem_i32(i32 %a, i32 %b) {
+; CHECK: rem.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+ %ret = urem i32 %a, %b
+ ret i32 %ret
+}
+
+define i32 @and_i32(i32 %a, i32 %b) {
+; CHECK: and.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+ %ret = and i32 %a, %b
+ ret i32 %ret
+}
+
+define i32 @or_i32(i32 %a, i32 %b) {
+; CHECK: or.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+ %ret = or i32 %a, %b
+ ret i32 %ret
+}
+
+define i32 @xor_i32(i32 %a, i32 %b) {
+; CHECK: xor.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+ %ret = xor i32 %a, %b
+ ret i32 %ret
+}
+
+define i32 @shl_i32(i32 %a, i32 %b) {
+; CHECK: shl.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+ %ret = shl i32 %a, %b
+ ret i32 %ret
+}
+
+define i32 @ashr_i32(i32 %a, i32 %b) {
+; CHECK: shr.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+ %ret = ashr i32 %a, %b
+ ret i32 %ret
+}
+
+define i32 @lshr_i32(i32 %a, i32 %b) {
+; CHECK: shr.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+ %ret = lshr i32 %a, %b
+ ret i32 %ret
+}
+
+;;; i16
+
+define i16 @add_i16(i16 %a, i16 %b) {
+; CHECK: add.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: ret
+ %ret = add i16 %a, %b
+ ret i16 %ret
+}
+
+define i16 @sub_i16(i16 %a, i16 %b) {
+; CHECK: sub.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: ret
+ %ret = sub i16 %a, %b
+ ret i16 %ret
+}
+
+define i16 @mul_i16(i16 %a, i16 %b) {
+; CHECK: mul.lo.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: ret
+ %ret = mul i16 %a, %b
+ ret i16 %ret
+}
+
+define i16 @sdiv_i16(i16 %a, i16 %b) {
+; CHECK: div.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: ret
+ %ret = sdiv i16 %a, %b
+ ret i16 %ret
+}
+
+define i16 @udiv_i16(i16 %a, i16 %b) {
+; CHECK: div.u16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: ret
+ %ret = udiv i16 %a, %b
+ ret i16 %ret
+}
+
+define i16 @srem_i16(i16 %a, i16 %b) {
+; CHECK: rem.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: ret
+ %ret = srem i16 %a, %b
+ ret i16 %ret
+}
+
+define i16 @urem_i16(i16 %a, i16 %b) {
+; CHECK: rem.u16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: ret
+ %ret = urem i16 %a, %b
+ ret i16 %ret
+}
+
+define i16 @and_i16(i16 %a, i16 %b) {
+; CHECK: and.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: ret
+ %ret = and i16 %a, %b
+ ret i16 %ret
+}
+
+define i16 @or_i16(i16 %a, i16 %b) {
+; CHECK: or.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: ret
+ %ret = or i16 %a, %b
+ ret i16 %ret
+}
+
+define i16 @xor_i16(i16 %a, i16 %b) {
+; CHECK: xor.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: ret
+ %ret = xor i16 %a, %b
+ ret i16 %ret
+}
+
+define i16 @shl_i16(i16 %a, i16 %b) {
+; PTX requires 32-bit shift amount
+; CHECK: shl.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+ %ret = shl i16 %a, %b
+ ret i16 %ret
+}
+
+define i16 @ashr_i16(i16 %a, i16 %b) {
+; PTX requires 32-bit shift amount
+; CHECK: shr.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+ %ret = ashr i16 %a, %b
+ ret i16 %ret
+}
+
+define i16 @lshr_i16(i16 %a, i16 %b) {
+; PTX requires 32-bit shift amount
+; CHECK: shr.u16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+ %ret = lshr i16 %a, %b
+ ret i16 %ret
+}
diff --git a/test/CodeGen/NVPTX/calling-conv.ll b/test/CodeGen/NVPTX/calling-conv.ll
new file mode 100644
index 000000000000..968203e5f70e
--- /dev/null
+++ b/test/CodeGen/NVPTX/calling-conv.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+
+;; Kernel function using ptx_kernel calling conv
+
+; CHECK: .entry kernel_func
+define ptx_kernel void @kernel_func(float* %a) {
+; CHECK: ret
+ ret void
+}
+
+;; Device function
+; CHECK: .func device_func
+define void @device_func(float* %a) {
+; CHECK: ret
+ ret void
+}
+
+;; Kernel function using NVVM metadata
+; CHECK: .entry metadata_kernel
+define void @metadata_kernel(float* %a) {
+; CHECK: ret
+ ret void
+}
+
+
+!nvvm.annotations = !{!1}
+
+!1 = metadata !{void (float*)* @metadata_kernel, metadata !"kernel", i32 1}
diff --git a/test/CodeGen/NVPTX/compare-int.ll b/test/CodeGen/NVPTX/compare-int.ll
new file mode 100644
index 000000000000..12fc7548212c
--- /dev/null
+++ b/test/CodeGen/NVPTX/compare-int.ll
@@ -0,0 +1,389 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+;; These tests should run for all targets
+
+;;===-- Basic instruction selection tests ---------------------------------===;;
+
+
+;;; i64
+
+define i64 @icmp_eq_i64(i64 %a, i64 %b) {
+; CHECK: setp.eq.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp eq i64 %a, %b
+ %ret = zext i1 %cmp to i64
+ ret i64 %ret
+}
+
+define i64 @icmp_ne_i64(i64 %a, i64 %b) {
+; CHECK: setp.ne.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp ne i64 %a, %b
+ %ret = zext i1 %cmp to i64
+ ret i64 %ret
+}
+
+define i64 @icmp_ugt_i64(i64 %a, i64 %b) {
+; CHECK: setp.gt.u64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp ugt i64 %a, %b
+ %ret = zext i1 %cmp to i64
+ ret i64 %ret
+}
+
+define i64 @icmp_uge_i64(i64 %a, i64 %b) {
+; CHECK: setp.ge.u64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp uge i64 %a, %b
+ %ret = zext i1 %cmp to i64
+ ret i64 %ret
+}
+
+define i64 @icmp_ult_i64(i64 %a, i64 %b) {
+; CHECK: setp.lt.u64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp ult i64 %a, %b
+ %ret = zext i1 %cmp to i64
+ ret i64 %ret
+}
+
+define i64 @icmp_ule_i64(i64 %a, i64 %b) {
+; CHECK: setp.le.u64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp ule i64 %a, %b
+ %ret = zext i1 %cmp to i64
+ ret i64 %ret
+}
+
+define i64 @icmp_sgt_i64(i64 %a, i64 %b) {
+; CHECK: setp.gt.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp sgt i64 %a, %b
+ %ret = zext i1 %cmp to i64
+ ret i64 %ret
+}
+
+define i64 @icmp_sge_i64(i64 %a, i64 %b) {
+; CHECK: setp.ge.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp sge i64 %a, %b
+ %ret = zext i1 %cmp to i64
+ ret i64 %ret
+}
+
+define i64 @icmp_slt_i64(i64 %a, i64 %b) {
+; CHECK: setp.lt.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp slt i64 %a, %b
+ %ret = zext i1 %cmp to i64
+ ret i64 %ret
+}
+
+define i64 @icmp_sle_i64(i64 %a, i64 %b) {
+; CHECK: setp.le.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp sle i64 %a, %b
+ %ret = zext i1 %cmp to i64
+ ret i64 %ret
+}
+
+;;; i32
+
+define i32 @icmp_eq_i32(i32 %a, i32 %b) {
+; CHECK: setp.eq.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp eq i32 %a, %b
+ %ret = zext i1 %cmp to i32
+ ret i32 %ret
+}
+
+define i32 @icmp_ne_i32(i32 %a, i32 %b) {
+; CHECK: setp.ne.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp ne i32 %a, %b
+ %ret = zext i1 %cmp to i32
+ ret i32 %ret
+}
+
+define i32 @icmp_ugt_i32(i32 %a, i32 %b) {
+; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp ugt i32 %a, %b
+ %ret = zext i1 %cmp to i32
+ ret i32 %ret
+}
+
+define i32 @icmp_uge_i32(i32 %a, i32 %b) {
+; CHECK: setp.ge.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp uge i32 %a, %b
+ %ret = zext i1 %cmp to i32
+ ret i32 %ret
+}
+
+define i32 @icmp_ult_i32(i32 %a, i32 %b) {
+; CHECK: setp.lt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp ult i32 %a, %b
+ %ret = zext i1 %cmp to i32
+ ret i32 %ret
+}
+
+define i32 @icmp_ule_i32(i32 %a, i32 %b) {
+; CHECK: setp.le.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp ule i32 %a, %b
+ %ret = zext i1 %cmp to i32
+ ret i32 %ret
+}
+
+define i32 @icmp_sgt_i32(i32 %a, i32 %b) {
+; CHECK: setp.gt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp sgt i32 %a, %b
+ %ret = zext i1 %cmp to i32
+ ret i32 %ret
+}
+
+define i32 @icmp_sge_i32(i32 %a, i32 %b) {
+; CHECK: setp.ge.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp sge i32 %a, %b
+ %ret = zext i1 %cmp to i32
+ ret i32 %ret
+}
+
+define i32 @icmp_slt_i32(i32 %a, i32 %b) {
+; CHECK: setp.lt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp slt i32 %a, %b
+ %ret = zext i1 %cmp to i32
+ ret i32 %ret
+}
+
+define i32 @icmp_sle_i32(i32 %a, i32 %b) {
+; CHECK: setp.le.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp sle i32 %a, %b
+ %ret = zext i1 %cmp to i32
+ ret i32 %ret
+}
+
+
+;;; i16
+
+define i16 @icmp_eq_i16(i16 %a, i16 %b) {
+; CHECK: setp.eq.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp eq i16 %a, %b
+ %ret = zext i1 %cmp to i16
+ ret i16 %ret
+}
+
+define i16 @icmp_ne_i16(i16 %a, i16 %b) {
+; CHECK: setp.ne.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp ne i16 %a, %b
+ %ret = zext i1 %cmp to i16
+ ret i16 %ret
+}
+
+define i16 @icmp_ugt_i16(i16 %a, i16 %b) {
+; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp ugt i16 %a, %b
+ %ret = zext i1 %cmp to i16
+ ret i16 %ret
+}
+
+define i16 @icmp_uge_i16(i16 %a, i16 %b) {
+; CHECK: setp.ge.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp uge i16 %a, %b
+ %ret = zext i1 %cmp to i16
+ ret i16 %ret
+}
+
+define i16 @icmp_ult_i16(i16 %a, i16 %b) {
+; CHECK: setp.lt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp ult i16 %a, %b
+ %ret = zext i1 %cmp to i16
+ ret i16 %ret
+}
+
+define i16 @icmp_ule_i16(i16 %a, i16 %b) {
+; CHECK: setp.le.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp ule i16 %a, %b
+ %ret = zext i1 %cmp to i16
+ ret i16 %ret
+}
+
+define i16 @icmp_sgt_i16(i16 %a, i16 %b) {
+; CHECK: setp.gt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp sgt i16 %a, %b
+ %ret = zext i1 %cmp to i16
+ ret i16 %ret
+}
+
+define i16 @icmp_sge_i16(i16 %a, i16 %b) {
+; CHECK: setp.ge.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp sge i16 %a, %b
+ %ret = zext i1 %cmp to i16
+ ret i16 %ret
+}
+
+define i16 @icmp_slt_i16(i16 %a, i16 %b) {
+; CHECK: setp.lt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp slt i16 %a, %b
+ %ret = zext i1 %cmp to i16
+ ret i16 %ret
+}
+
+define i16 @icmp_sle_i16(i16 %a, i16 %b) {
+; CHECK: setp.le.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp sle i16 %a, %b
+ %ret = zext i1 %cmp to i16
+ ret i16 %ret
+}
+
+
+;;; i8
+
+define i8 @icmp_eq_i8(i8 %a, i8 %b) {
+; Comparison happens in 16-bit
+; CHECK: setp.eq.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
+; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp eq i8 %a, %b
+ %ret = zext i1 %cmp to i8
+ ret i8 %ret
+}
+
+define i8 @icmp_ne_i8(i8 %a, i8 %b) {
+; Comparison happens in 16-bit
+; CHECK: setp.ne.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
+; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp ne i8 %a, %b
+ %ret = zext i1 %cmp to i8
+ ret i8 %ret
+}
+
+define i8 @icmp_ugt_i8(i8 %a, i8 %b) {
+; Comparison happens in 16-bit
+; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
+; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp ugt i8 %a, %b
+ %ret = zext i1 %cmp to i8
+ ret i8 %ret
+}
+
+define i8 @icmp_uge_i8(i8 %a, i8 %b) {
+; Comparison happens in 16-bit
+; CHECK: setp.ge.u16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
+; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp uge i8 %a, %b
+ %ret = zext i1 %cmp to i8
+ ret i8 %ret
+}
+
+define i8 @icmp_ult_i8(i8 %a, i8 %b) {
+; Comparison happens in 16-bit
+; CHECK: setp.lt.u16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
+; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp ult i8 %a, %b
+ %ret = zext i1 %cmp to i8
+ ret i8 %ret
+}
+
+define i8 @icmp_ule_i8(i8 %a, i8 %b) {
+; Comparison happens in 16-bit
+; CHECK: setp.le.u16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
+; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp ule i8 %a, %b
+ %ret = zext i1 %cmp to i8
+ ret i8 %ret
+}
+
+define i8 @icmp_sgt_i8(i8 %a, i8 %b) {
+; Comparison happens in 16-bit
+; CHECK: setp.gt.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
+; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp sgt i8 %a, %b
+ %ret = zext i1 %cmp to i8
+ ret i8 %ret
+}
+
+define i8 @icmp_sge_i8(i8 %a, i8 %b) {
+; Comparison happens in 16-bit
+; CHECK: setp.ge.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
+; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp sge i8 %a, %b
+ %ret = zext i1 %cmp to i8
+ ret i8 %ret
+}
+
+define i8 @icmp_slt_i8(i8 %a, i8 %b) {
+; Comparison happens in 16-bit
+; CHECK: setp.lt.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
+; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp slt i8 %a, %b
+ %ret = zext i1 %cmp to i8
+ ret i8 %ret
+}
+
+define i8 @icmp_sle_i8(i8 %a, i8 %b) {
+; Comparison happens in 16-bit
+; CHECK: setp.le.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
+; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: ret
+ %cmp = icmp sle i8 %a, %b
+ %ret = zext i1 %cmp to i8
+ ret i8 %ret
+}
diff --git a/test/CodeGen/NVPTX/convert-fp.ll b/test/CodeGen/NVPTX/convert-fp.ll
new file mode 100644
index 000000000000..21c84379b062
--- /dev/null
+++ b/test/CodeGen/NVPTX/convert-fp.ll
@@ -0,0 +1,146 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+
+define i16 @cvt_i16_f32(float %x) {
+; CHECK: cvt.rzi.u16.f32 %rs{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: ret;
+ %a = fptoui float %x to i16
+ ret i16 %a
+}
+
+define i16 @cvt_i16_f64(double %x) {
+; CHECK: cvt.rzi.u16.f64 %rs{{[0-9]+}}, %fl{{[0-9]+}};
+; CHECK: ret;
+ %a = fptoui double %x to i16
+ ret i16 %a
+}
+
+define i32 @cvt_i32_f32(float %x) {
+; CHECK: cvt.rzi.u32.f32 %r{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: ret;
+ %a = fptoui float %x to i32
+ ret i32 %a
+}
+
+define i32 @cvt_i32_f64(double %x) {
+; CHECK: cvt.rzi.u32.f64 %r{{[0-9]+}}, %fl{{[0-9]+}};
+; CHECK: ret;
+ %a = fptoui double %x to i32
+ ret i32 %a
+}
+
+
+define i64 @cvt_i64_f32(float %x) {
+; CHECK: cvt.rzi.u64.f32 %rl{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: ret;
+ %a = fptoui float %x to i64
+ ret i64 %a
+}
+
+define i64 @cvt_i64_f64(double %x) {
+; CHECK: cvt.rzi.u64.f64 %rl{{[0-9]+}}, %fl{{[0-9]+}};
+; CHECK: ret;
+ %a = fptoui double %x to i64
+ ret i64 %a
+}
+
+define float @cvt_f32_i16(i16 %x) {
+; CHECK: cvt.rn.f32.u16 %f{{[0-9]+}}, %rs{{[0-9]+}};
+; CHECK: ret;
+ %a = uitofp i16 %x to float
+ ret float %a
+}
+
+define float @cvt_f32_i32(i32 %x) {
+; CHECK: cvt.rn.f32.u32 %f{{[0-9]+}}, %r{{[0-9]+}};
+; CHECK: ret;
+ %a = uitofp i32 %x to float
+ ret float %a
+}
+
+define float @cvt_f32_i64(i64 %x) {
+; CHECK: cvt.rn.f32.u64 %f{{[0-9]+}}, %rl{{[0-9]+}};
+; CHECK: ret;
+ %a = uitofp i64 %x to float
+ ret float %a
+}
+
+define float @cvt_f32_f64(double %x) {
+; CHECK: cvt.rn.f32.f64 %f{{[0-9]+}}, %fl{{[0-9]+}};
+; CHECK: ret;
+ %a = fptrunc double %x to float
+ ret float %a
+}
+
+define float @cvt_f32_s16(i16 %x) {
+; CHECK: cvt.rn.f32.s16 %f{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: ret
+ %a = sitofp i16 %x to float
+ ret float %a
+}
+
+define float @cvt_f32_s32(i32 %x) {
+; CHECK: cvt.rn.f32.s32 %f{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+ %a = sitofp i32 %x to float
+ ret float %a
+}
+
+define float @cvt_f32_s64(i64 %x) {
+; CHECK: cvt.rn.f32.s64 %f{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: ret
+ %a = sitofp i64 %x to float
+ ret float %a
+}
+
+define double @cvt_f64_i16(i16 %x) {
+; CHECK: cvt.rn.f64.u16 %fl{{[0-9]+}}, %rs{{[0-9]+}};
+; CHECK: ret;
+ %a = uitofp i16 %x to double
+ ret double %a
+}
+
+define double @cvt_f64_i32(i32 %x) {
+; CHECK: cvt.rn.f64.u32 %fl{{[0-9]+}}, %r{{[0-9]+}};
+; CHECK: ret;
+ %a = uitofp i32 %x to double
+ ret double %a
+}
+
+define double @cvt_f64_i64(i64 %x) {
+; CHECK: cvt.rn.f64.u64 %fl{{[0-9]+}}, %rl{{[0-9]+}};
+; CHECK: ret;
+ %a = uitofp i64 %x to double
+ ret double %a
+}
+
+define double @cvt_f64_f32(float %x) {
+; CHECK: cvt.f64.f32 %fl{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: ret;
+ %a = fpext float %x to double
+ ret double %a
+}
+
+define double @cvt_f64_s16(i16 %x) {
+; CHECK: cvt.rn.f64.s16 %fl{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: ret
+ %a = sitofp i16 %x to double
+ ret double %a
+}
+
+define double @cvt_f64_s32(i32 %x) {
+; CHECK: cvt.rn.f64.s32 %fl{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+ %a = sitofp i32 %x to double
+ ret double %a
+}
+
+define double @cvt_f64_s64(i64 %x) {
+; CHECK: cvt.rn.f64.s64 %fl{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: ret
+ %a = sitofp i64 %x to double
+ ret double %a
+}
diff --git a/test/CodeGen/NVPTX/convert-int-sm10.ll b/test/CodeGen/NVPTX/convert-int-sm10.ll
new file mode 100644
index 000000000000..20716f982e3b
--- /dev/null
+++ b/test/CodeGen/NVPTX/convert-int-sm10.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
+
+
+; i16
+
+define i16 @cvt_i16_i32(i32 %x) {
+; CHECK: cvt.u16.u32 %rs{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+ %a = trunc i32 %x to i16
+ ret i16 %a
+}
+
+define i16 @cvt_i16_i64(i64 %x) {
+; CHECK: cvt.u16.u64 %rs{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: ret
+ %a = trunc i64 %x to i16
+ ret i16 %a
+}
+
+
+
+; i32
+
+define i32 @cvt_i32_i16(i16 %x) {
+; CHECK: cvt.u32.u16 %r{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: ret
+ %a = zext i16 %x to i32
+ ret i32 %a
+}
+
+define i32 @cvt_i32_i64(i64 %x) {
+; CHECK: cvt.u32.u64 %r{{[0-9]+}}, %rl{{[0-9]+}}
+; CHECK: ret
+ %a = trunc i64 %x to i32
+ ret i32 %a
+}
+
+
+
+; i64
+
+define i64 @cvt_i64_i16(i16 %x) {
+; CHECK: cvt.u64.u16 %rl{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: ret
+ %a = zext i16 %x to i64
+ ret i64 %a
+}
+
+define i64 @cvt_i64_i32(i32 %x) {
+; CHECK: cvt.u64.u32 %rl{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: ret
+ %a = zext i32 %x to i64
+ ret i64 %a
+}
diff --git a/test/CodeGen/NVPTX/convert-int-sm20.ll b/test/CodeGen/NVPTX/convert-int-sm20.ll
new file mode 100644
index 000000000000..fad240e03d2a
--- /dev/null
+++ b/test/CodeGen/NVPTX/convert-int-sm20.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+
+;; Integer conversions happen inplicitly by loading/storing the proper types
+
+
+; i16
+
+define i16 @cvt_i16_i32(i32 %x) {
+; CHECK: ld.param.u16 %rs[[R0:[0-9]+]], [cvt_i16_i32_param_{{[0-9]+}}]
+; CHECK: st.param.b16 [func_retval{{[0-9]+}}+0], %rs[[R0]]
+; CHECK: ret
+ %a = trunc i32 %x to i16
+ ret i16 %a
+}
+
+define i16 @cvt_i16_i64(i64 %x) {
+; CHECK: ld.param.u16 %rs[[R0:[0-9]+]], [cvt_i16_i64_param_{{[0-9]+}}]
+; CHECK: st.param.b16 [func_retval{{[0-9]+}}+0], %rs[[R0]]
+; CHECK: ret
+ %a = trunc i64 %x to i16
+ ret i16 %a
+}
+
+
+
+; i32
+
+define i32 @cvt_i32_i16(i16 %x) {
+; CHECK: ld.param.u16 %r[[R0:[0-9]+]], [cvt_i32_i16_param_{{[0-9]+}}]
+; CHECK: st.param.b32 [func_retval{{[0-9]+}}+0], %r[[R0]]
+; CHECK: ret
+ %a = zext i16 %x to i32
+ ret i32 %a
+}
+
+define i32 @cvt_i32_i64(i64 %x) {
+; CHECK: ld.param.u32 %r[[R0:[0-9]+]], [cvt_i32_i64_param_{{[0-9]+}}]
+; CHECK: st.param.b32 [func_retval{{[0-9]+}}+0], %r[[R0]]
+; CHECK: ret
+ %a = trunc i64 %x to i32
+ ret i32 %a
+}
+
+
+
+; i64
+
+define i64 @cvt_i64_i16(i16 %x) {
+; CHECK: ld.param.u16 %rl[[R0:[0-9]+]], [cvt_i64_i16_param_{{[0-9]+}}]
+; CHECK: st.param.b64 [func_retval{{[0-9]+}}+0], %rl[[R0]]
+; CHECK: ret
+ %a = zext i16 %x to i64
+ ret i64 %a
+}
+
+define i64 @cvt_i64_i32(i32 %x) {
+; CHECK: ld.param.u32 %rl[[R0:[0-9]+]], [cvt_i64_i32_param_{{[0-9]+}}]
+; CHECK: st.param.b64 [func_retval{{[0-9]+}}+0], %rl[[R0]]
+; CHECK: ret
+ %a = zext i32 %x to i64
+ ret i64 %a
+}
diff --git a/test/CodeGen/NVPTX/fma-disable.ll b/test/CodeGen/NVPTX/fma-disable.ll
new file mode 100644
index 000000000000..bdd74017f9d2
--- /dev/null
+++ b/test/CodeGen/NVPTX/fma-disable.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -nvptx-fma-level=1 | FileCheck %s -check-prefix=FMA
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -nvptx-fma-level=0 | FileCheck %s -check-prefix=MUL
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -nvptx-fma-level=1 | FileCheck %s -check-prefix=FMA
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -nvptx-fma-level=0 | FileCheck %s -check-prefix=MUL
+
+define ptx_device float @test_mul_add_f(float %x, float %y, float %z) {
+entry:
+; FMA: fma.rn.f32
+; MUL: mul.rn.f32
+; MUL: add.rn.f32
+ %a = fmul float %x, %y
+ %b = fadd float %a, %z
+ ret float %b
+}
+
+define ptx_device double @test_mul_add_d(double %x, double %y, double %z) {
+entry:
+; FMA: fma.rn.f64
+; MUL: mul.rn.f64
+; MUL: add.rn.f64
+ %a = fmul double %x, %y
+ %b = fadd double %a, %z
+ ret double %b
+}
diff --git a/test/CodeGen/NVPTX/fma.ll b/test/CodeGen/NVPTX/fma.ll
new file mode 100644
index 000000000000..4ef1a9a4cefb
--- /dev/null
+++ b/test/CodeGen/NVPTX/fma.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+define ptx_device float @t1_f32(float %x, float %y, float %z) {
+; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: ret;
+ %a = fmul float %x, %y
+ %b = fadd float %a, %z
+ ret float %b
+}
+
+define ptx_device double @t1_f64(double %x, double %y, double %z) {
+; CHECK: fma.rn.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}};
+; CHECK: ret;
+ %a = fmul double %x, %y
+ %b = fadd double %a, %z
+ ret double %b
+}
diff --git a/test/CodeGen/PTX/intrinsic.ll b/test/CodeGen/NVPTX/intrinsic-old.ll
index 9f37ead38d7f..1c9879c4178b 100644
--- a/test/CodeGen/PTX/intrinsic.ll
+++ b/test/CodeGen/NVPTX/intrinsic-old.ll
@@ -1,231 +1,234 @@
-; RUN: llc < %s -march=ptx32 -mattr=+ptx20 | FileCheck %s
+; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
define ptx_device i32 @test_tid_x() {
-; CHECK: mov.u32 %ret0, %tid.x;
+; CHECK: mov.u32 %r0, %tid.x;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.tid.x()
ret i32 %x
}
define ptx_device i32 @test_tid_y() {
-; CHECK: mov.u32 %ret0, %tid.y;
+; CHECK: mov.u32 %r0, %tid.y;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.tid.y()
ret i32 %x
}
define ptx_device i32 @test_tid_z() {
-; CHECK: mov.u32 %ret0, %tid.z;
+; CHECK: mov.u32 %r0, %tid.z;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.tid.z()
ret i32 %x
}
define ptx_device i32 @test_tid_w() {
-; CHECK: mov.u32 %ret0, %tid.w;
+; CHECK: mov.u32 %r0, %tid.w;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.tid.w()
ret i32 %x
}
define ptx_device i32 @test_ntid_x() {
-; CHECK: mov.u32 %ret0, %ntid.x;
+; CHECK: mov.u32 %r0, %ntid.x;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ntid.x()
ret i32 %x
}
define ptx_device i32 @test_ntid_y() {
-; CHECK: mov.u32 %ret0, %ntid.y;
+; CHECK: mov.u32 %r0, %ntid.y;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ntid.y()
ret i32 %x
}
define ptx_device i32 @test_ntid_z() {
-; CHECK: mov.u32 %ret0, %ntid.z;
+; CHECK: mov.u32 %r0, %ntid.z;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ntid.z()
ret i32 %x
}
define ptx_device i32 @test_ntid_w() {
-; CHECK: mov.u32 %ret0, %ntid.w;
+; CHECK: mov.u32 %r0, %ntid.w;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ntid.w()
ret i32 %x
}
define ptx_device i32 @test_laneid() {
-; CHECK: mov.u32 %ret0, %laneid;
+; CHECK: mov.u32 %r0, %laneid;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.laneid()
ret i32 %x
}
define ptx_device i32 @test_warpid() {
-; CHECK: mov.u32 %ret0, %warpid;
+; CHECK: mov.u32 %r0, %warpid;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.warpid()
ret i32 %x
}
define ptx_device i32 @test_nwarpid() {
-; CHECK: mov.u32 %ret0, %nwarpid;
+; CHECK: mov.u32 %r0, %nwarpid;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.nwarpid()
ret i32 %x
}
define ptx_device i32 @test_ctaid_x() {
-; CHECK: mov.u32 %ret0, %ctaid.x;
+; CHECK: mov.u32 %r0, %ctaid.x;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ctaid.x()
ret i32 %x
}
define ptx_device i32 @test_ctaid_y() {
-; CHECK: mov.u32 %ret0, %ctaid.y;
+; CHECK: mov.u32 %r0, %ctaid.y;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ctaid.y()
ret i32 %x
}
define ptx_device i32 @test_ctaid_z() {
-; CHECK: mov.u32 %ret0, %ctaid.z;
+; CHECK: mov.u32 %r0, %ctaid.z;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ctaid.z()
ret i32 %x
}
define ptx_device i32 @test_ctaid_w() {
-; CHECK: mov.u32 %ret0, %ctaid.w;
+; CHECK: mov.u32 %r0, %ctaid.w;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ctaid.w()
ret i32 %x
}
define ptx_device i32 @test_nctaid_x() {
-; CHECK: mov.u32 %ret0, %nctaid.x;
+; CHECK: mov.u32 %r0, %nctaid.x;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.nctaid.x()
ret i32 %x
}
define ptx_device i32 @test_nctaid_y() {
-; CHECK: mov.u32 %ret0, %nctaid.y;
+; CHECK: mov.u32 %r0, %nctaid.y;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.nctaid.y()
ret i32 %x
}
define ptx_device i32 @test_nctaid_z() {
-; CHECK: mov.u32 %ret0, %nctaid.z;
+; CHECK: mov.u32 %r0, %nctaid.z;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.nctaid.z()
ret i32 %x
}
define ptx_device i32 @test_nctaid_w() {
-; CHECK: mov.u32 %ret0, %nctaid.w;
+; CHECK: mov.u32 %r0, %nctaid.w;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.nctaid.w()
ret i32 %x
}
define ptx_device i32 @test_smid() {
-; CHECK: mov.u32 %ret0, %smid;
+; CHECK: mov.u32 %r0, %smid;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.smid()
ret i32 %x
}
define ptx_device i32 @test_nsmid() {
-; CHECK: mov.u32 %ret0, %nsmid;
+; CHECK: mov.u32 %r0, %nsmid;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.nsmid()
ret i32 %x
}
define ptx_device i32 @test_gridid() {
-; CHECK: mov.u32 %ret0, %gridid;
+; CHECK: mov.u32 %r0, %gridid;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.gridid()
ret i32 %x
}
define ptx_device i32 @test_lanemask_eq() {
-; CHECK: mov.u32 %ret0, %lanemask_eq;
+; CHECK: mov.u32 %r0, %lanemask_eq;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.lanemask.eq()
ret i32 %x
}
define ptx_device i32 @test_lanemask_le() {
-; CHECK: mov.u32 %ret0, %lanemask_le;
+; CHECK: mov.u32 %r0, %lanemask_le;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.lanemask.le()
ret i32 %x
}
define ptx_device i32 @test_lanemask_lt() {
-; CHECK: mov.u32 %ret0, %lanemask_lt;
+; CHECK: mov.u32 %r0, %lanemask_lt;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.lanemask.lt()
ret i32 %x
}
define ptx_device i32 @test_lanemask_ge() {
-; CHECK: mov.u32 %ret0, %lanemask_ge;
+; CHECK: mov.u32 %r0, %lanemask_ge;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.lanemask.ge()
ret i32 %x
}
define ptx_device i32 @test_lanemask_gt() {
-; CHECK: mov.u32 %ret0, %lanemask_gt;
+; CHECK: mov.u32 %r0, %lanemask_gt;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.lanemask.gt()
ret i32 %x
}
define ptx_device i32 @test_clock() {
-; CHECK: mov.u32 %ret0, %clock;
+; CHECK: mov.u32 %r0, %clock;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.clock()
ret i32 %x
}
define ptx_device i64 @test_clock64() {
-; CHECK: mov.u64 %ret0, %clock64;
+; CHECK: mov.u64 %rl0, %clock64;
; CHECK: ret;
%x = call i64 @llvm.ptx.read.clock64()
ret i64 %x
}
define ptx_device i32 @test_pm0() {
-; CHECK: mov.u32 %ret0, %pm0;
+; CHECK: mov.u32 %r0, %pm0;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.pm0()
ret i32 %x
}
define ptx_device i32 @test_pm1() {
-; CHECK: mov.u32 %ret0, %pm1;
+; CHECK: mov.u32 %r0, %pm1;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.pm1()
ret i32 %x
}
define ptx_device i32 @test_pm2() {
-; CHECK: mov.u32 %ret0, %pm2;
+; CHECK: mov.u32 %r0, %pm2;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.pm2()
ret i32 %x
}
define ptx_device i32 @test_pm3() {
-; CHECK: mov.u32 %ret0, %pm3;
+; CHECK: mov.u32 %r0, %pm3;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.pm3()
ret i32 %x
diff --git a/test/CodeGen/NVPTX/intrinsics.ll b/test/CodeGen/NVPTX/intrinsics.ll
new file mode 100644
index 000000000000..afab60ca96a8
--- /dev/null
+++ b/test/CodeGen/NVPTX/intrinsics.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+define ptx_device float @test_fabsf(float %f) {
+; CHECK: abs.f32 %f0, %f0;
+; CHECK: ret;
+ %x = call float @llvm.fabs.f32(float %f)
+ ret float %x
+}
+
+define ptx_device double @test_fabs(double %d) {
+; CHECK: abs.f64 %fl0, %fl0;
+; CHECK: ret;
+ %x = call double @llvm.fabs.f64(double %d)
+ ret double %x
+}
+
+declare float @llvm.fabs.f32(float)
+declare double @llvm.fabs.f64(double)
diff --git a/test/CodeGen/NVPTX/ld-addrspace.ll b/test/CodeGen/NVPTX/ld-addrspace.ll
new file mode 100644
index 000000000000..d1f5093df223
--- /dev/null
+++ b/test/CodeGen/NVPTX/ld-addrspace.ll
@@ -0,0 +1,173 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s --check-prefix=PTX32
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s --check-prefix=PTX64
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
+
+
+;; i8
+define i8 @ld_global_i8(i8 addrspace(1)* %ptr) {
+; PTX32: ld.global.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.global.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+ %a = load i8 addrspace(1)* %ptr
+ ret i8 %a
+}
+
+define i8 @ld_shared_i8(i8 addrspace(3)* %ptr) {
+; PTX32: ld.shared.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.shared.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+ %a = load i8 addrspace(3)* %ptr
+ ret i8 %a
+}
+
+define i8 @ld_local_i8(i8 addrspace(5)* %ptr) {
+; PTX32: ld.local.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.local.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+ %a = load i8 addrspace(5)* %ptr
+ ret i8 %a
+}
+
+;; i16
+define i16 @ld_global_i16(i16 addrspace(1)* %ptr) {
+; PTX32: ld.global.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.global.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+ %a = load i16 addrspace(1)* %ptr
+ ret i16 %a
+}
+
+define i16 @ld_shared_i16(i16 addrspace(3)* %ptr) {
+; PTX32: ld.shared.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.shared.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+ %a = load i16 addrspace(3)* %ptr
+ ret i16 %a
+}
+
+define i16 @ld_local_i16(i16 addrspace(5)* %ptr) {
+; PTX32: ld.local.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.local.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+ %a = load i16 addrspace(5)* %ptr
+ ret i16 %a
+}
+
+;; i32
+define i32 @ld_global_i32(i32 addrspace(1)* %ptr) {
+; PTX32: ld.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.global.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+ %a = load i32 addrspace(1)* %ptr
+ ret i32 %a
+}
+
+define i32 @ld_shared_i32(i32 addrspace(3)* %ptr) {
+; PTX32: ld.shared.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.shared.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+ %a = load i32 addrspace(3)* %ptr
+ ret i32 %a
+}
+
+define i32 @ld_local_i32(i32 addrspace(5)* %ptr) {
+; PTX32: ld.local.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.local.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+ %a = load i32 addrspace(5)* %ptr
+ ret i32 %a
+}
+
+;; i64
+define i64 @ld_global_i64(i64 addrspace(1)* %ptr) {
+; PTX32: ld.global.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.global.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+ %a = load i64 addrspace(1)* %ptr
+ ret i64 %a
+}
+
+define i64 @ld_shared_i64(i64 addrspace(3)* %ptr) {
+; PTX32: ld.shared.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.shared.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+ %a = load i64 addrspace(3)* %ptr
+ ret i64 %a
+}
+
+define i64 @ld_local_i64(i64 addrspace(5)* %ptr) {
+; PTX32: ld.local.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.local.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+ %a = load i64 addrspace(5)* %ptr
+ ret i64 %a
+}
+
+;; f32
+define float @ld_global_f32(float addrspace(1)* %ptr) {
+; PTX32: ld.global.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.global.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+ %a = load float addrspace(1)* %ptr
+ ret float %a
+}
+
+define float @ld_shared_f32(float addrspace(3)* %ptr) {
+; PTX32: ld.shared.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.shared.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+ %a = load float addrspace(3)* %ptr
+ ret float %a
+}
+
+define float @ld_local_f32(float addrspace(5)* %ptr) {
+; PTX32: ld.local.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.local.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+ %a = load float addrspace(5)* %ptr
+ ret float %a
+}
+
+;; f64
+define double @ld_global_f64(double addrspace(1)* %ptr) {
+; PTX32: ld.global.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.global.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+ %a = load double addrspace(1)* %ptr
+ ret double %a
+}
+
+define double @ld_shared_f64(double addrspace(3)* %ptr) {
+; PTX32: ld.shared.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.shared.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+ %a = load double addrspace(3)* %ptr
+ ret double %a
+}
+
+define double @ld_local_f64(double addrspace(5)* %ptr) {
+; PTX32: ld.local.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.local.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+ %a = load double addrspace(5)* %ptr
+ ret double %a
+}
diff --git a/test/CodeGen/NVPTX/ld-generic.ll b/test/CodeGen/NVPTX/ld-generic.ll
new file mode 100644
index 000000000000..81a5216f963a
--- /dev/null
+++ b/test/CodeGen/NVPTX/ld-generic.ll
@@ -0,0 +1,63 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
+
+
+;; i8
+define i8 @ld_global_i8(i8 addrspace(0)* %ptr) {
+; PTX32: ld.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+ %a = load i8 addrspace(0)* %ptr
+ ret i8 %a
+}
+
+;; i16
+define i16 @ld_global_i16(i16 addrspace(0)* %ptr) {
+; PTX32: ld.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+ %a = load i16 addrspace(0)* %ptr
+ ret i16 %a
+}
+
+;; i32
+define i32 @ld_global_i32(i32 addrspace(0)* %ptr) {
+; PTX32: ld.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+ %a = load i32 addrspace(0)* %ptr
+ ret i32 %a
+}
+
+;; i64
+define i64 @ld_global_i64(i64 addrspace(0)* %ptr) {
+; PTX32: ld.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+ %a = load i64 addrspace(0)* %ptr
+ ret i64 %a
+}
+
+;; f32
+define float @ld_global_f32(float addrspace(0)* %ptr) {
+; PTX32: ld.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+ %a = load float addrspace(0)* %ptr
+ ret float %a
+}
+
+;; f64
+define double @ld_global_f64(double addrspace(0)* %ptr) {
+; PTX32: ld.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ret
+; PTX64: ld.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ret
+ %a = load double addrspace(0)* %ptr
+ ret double %a
+}
diff --git a/test/CodeGen/PTX/lit.local.cfg b/test/CodeGen/NVPTX/lit.local.cfg
index e748f7f05b31..7180c841d6e8 100644
--- a/test/CodeGen/PTX/lit.local.cfg
+++ b/test/CodeGen/NVPTX/lit.local.cfg
@@ -1,6 +1,5 @@
config.suffixes = ['.ll', '.c', '.cpp']
targets = set(config.root.targets_to_build.split())
-if not 'PTX' in targets:
+if not 'NVPTX' in targets:
config.unsupported = True
-
diff --git a/test/CodeGen/NVPTX/simple-call.ll b/test/CodeGen/NVPTX/simple-call.ll
new file mode 100644
index 000000000000..ab6f423cd80a
--- /dev/null
+++ b/test/CodeGen/NVPTX/simple-call.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+
+
+; CHECK: .func ({{.*}}) device_func
+define float @device_func(float %a) noinline {
+ %ret = fmul float %a, %a
+ ret float %ret
+}
+
+; CHECK: .entry kernel_func
+define void @kernel_func(float* %a) {
+ %val = load float* %a
+; CHECK: call.uni (retval0),
+; CHECK: device_func,
+ %mul = call float @device_func(float %val)
+ store float %mul, float* %a
+ ret void
+}
+
+
+
+!nvvm.annotations = !{!1}
+
+!1 = metadata !{void (float*)* @kernel_func, metadata !"kernel", i32 1}
diff --git a/test/CodeGen/NVPTX/st-addrspace.ll b/test/CodeGen/NVPTX/st-addrspace.ll
new file mode 100644
index 000000000000..54e04ae6106d
--- /dev/null
+++ b/test/CodeGen/NVPTX/st-addrspace.ll
@@ -0,0 +1,179 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s --check-prefix=PTX32
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s --check-prefix=PTX64
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
+
+
+;; i8
+
+define void @st_global_i8(i8 addrspace(1)* %ptr, i8 %a) {
+; PTX32: st.global.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.global.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX64: ret
+ store i8 %a, i8 addrspace(1)* %ptr
+ ret void
+}
+
+define void @st_shared_i8(i8 addrspace(3)* %ptr, i8 %a) {
+; PTX32: st.shared.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.shared.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX64: ret
+ store i8 %a, i8 addrspace(3)* %ptr
+ ret void
+}
+
+define void @st_local_i8(i8 addrspace(5)* %ptr, i8 %a) {
+; PTX32: st.local.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.local.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX64: ret
+ store i8 %a, i8 addrspace(5)* %ptr
+ ret void
+}
+
+;; i16
+
+define void @st_global_i16(i16 addrspace(1)* %ptr, i16 %a) {
+; PTX32: st.global.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.global.u16 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
+; PTX64: ret
+ store i16 %a, i16 addrspace(1)* %ptr
+ ret void
+}
+
+define void @st_shared_i16(i16 addrspace(3)* %ptr, i16 %a) {
+; PTX32: st.shared.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.shared.u16 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
+; PTX64: ret
+ store i16 %a, i16 addrspace(3)* %ptr
+ ret void
+}
+
+define void @st_local_i16(i16 addrspace(5)* %ptr, i16 %a) {
+; PTX32: st.local.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.local.u16 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
+; PTX64: ret
+ store i16 %a, i16 addrspace(5)* %ptr
+ ret void
+}
+
+;; i32
+
+define void @st_global_i32(i32 addrspace(1)* %ptr, i32 %a) {
+; PTX32: st.global.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.global.u32 [%rl{{[0-9]+}}], %r{{[0-9]+}}
+; PTX64: ret
+ store i32 %a, i32 addrspace(1)* %ptr
+ ret void
+}
+
+define void @st_shared_i32(i32 addrspace(3)* %ptr, i32 %a) {
+; PTX32: st.shared.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.shared.u32 [%rl{{[0-9]+}}], %r{{[0-9]+}}
+; PTX64: ret
+ store i32 %a, i32 addrspace(3)* %ptr
+ ret void
+}
+
+define void @st_local_i32(i32 addrspace(5)* %ptr, i32 %a) {
+; PTX32: st.local.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.local.u32 [%rl{{[0-9]+}}], %r{{[0-9]+}}
+; PTX64: ret
+ store i32 %a, i32 addrspace(5)* %ptr
+ ret void
+}
+
+;; i64
+
+define void @st_global_i64(i64 addrspace(1)* %ptr, i64 %a) {
+; PTX32: st.global.u64 [%r{{[0-9]+}}], %rl{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.global.u64 [%rl{{[0-9]+}}], %rl{{[0-9]+}}
+; PTX64: ret
+ store i64 %a, i64 addrspace(1)* %ptr
+ ret void
+}
+
+define void @st_shared_i64(i64 addrspace(3)* %ptr, i64 %a) {
+; PTX32: st.shared.u64 [%r{{[0-9]+}}], %rl{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.shared.u64 [%rl{{[0-9]+}}], %rl{{[0-9]+}}
+; PTX64: ret
+ store i64 %a, i64 addrspace(3)* %ptr
+ ret void
+}
+
+define void @st_local_i64(i64 addrspace(5)* %ptr, i64 %a) {
+; PTX32: st.local.u64 [%r{{[0-9]+}}], %rl{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.local.u64 [%rl{{[0-9]+}}], %rl{{[0-9]+}}
+; PTX64: ret
+ store i64 %a, i64 addrspace(5)* %ptr
+ ret void
+}
+
+;; f32
+
+define void @st_global_f32(float addrspace(1)* %ptr, float %a) {
+; PTX32: st.global.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.global.f32 [%rl{{[0-9]+}}], %f{{[0-9]+}}
+; PTX64: ret
+ store float %a, float addrspace(1)* %ptr
+ ret void
+}
+
+define void @st_shared_f32(float addrspace(3)* %ptr, float %a) {
+; PTX32: st.shared.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.shared.f32 [%rl{{[0-9]+}}], %f{{[0-9]+}}
+; PTX64: ret
+ store float %a, float addrspace(3)* %ptr
+ ret void
+}
+
+define void @st_local_f32(float addrspace(5)* %ptr, float %a) {
+; PTX32: st.local.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.local.f32 [%rl{{[0-9]+}}], %f{{[0-9]+}}
+; PTX64: ret
+ store float %a, float addrspace(5)* %ptr
+ ret void
+}
+
+;; f64
+
+define void @st_global_f64(double addrspace(1)* %ptr, double %a) {
+; PTX32: st.global.f64 [%r{{[0-9]+}}], %fl{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.global.f64 [%rl{{[0-9]+}}], %fl{{[0-9]+}}
+; PTX64: ret
+ store double %a, double addrspace(1)* %ptr
+ ret void
+}
+
+define void @st_shared_f64(double addrspace(3)* %ptr, double %a) {
+; PTX32: st.shared.f64 [%r{{[0-9]+}}], %fl{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.shared.f64 [%rl{{[0-9]+}}], %fl{{[0-9]+}}
+; PTX64: ret
+ store double %a, double addrspace(3)* %ptr
+ ret void
+}
+
+define void @st_local_f64(double addrspace(5)* %ptr, double %a) {
+; PTX32: st.local.f64 [%r{{[0-9]+}}], %fl{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.local.f64 [%rl{{[0-9]+}}], %fl{{[0-9]+}}
+; PTX64: ret
+ store double %a, double addrspace(5)* %ptr
+ ret void
+}
diff --git a/test/CodeGen/NVPTX/st-generic.ll b/test/CodeGen/NVPTX/st-generic.ll
new file mode 100644
index 000000000000..59a1fe021119
--- /dev/null
+++ b/test/CodeGen/NVPTX/st-generic.ll
@@ -0,0 +1,69 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
+
+
+;; i8
+
+define void @st_global_i8(i8 addrspace(0)* %ptr, i8 %a) {
+; PTX32: st.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX64: ret
+ store i8 %a, i8 addrspace(0)* %ptr
+ ret void
+}
+
+;; i16
+
+define void @st_global_i16(i16 addrspace(0)* %ptr, i16 %a) {
+; PTX32: st.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.u16 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
+; PTX64: ret
+ store i16 %a, i16 addrspace(0)* %ptr
+ ret void
+}
+
+;; i32
+
+define void @st_global_i32(i32 addrspace(0)* %ptr, i32 %a) {
+; PTX32: st.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.u32 [%rl{{[0-9]+}}], %r{{[0-9]+}}
+; PTX64: ret
+ store i32 %a, i32 addrspace(0)* %ptr
+ ret void
+}
+
+;; i64
+
+define void @st_global_i64(i64 addrspace(0)* %ptr, i64 %a) {
+; PTX32: st.u64 [%r{{[0-9]+}}], %rl{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.u64 [%rl{{[0-9]+}}], %rl{{[0-9]+}}
+; PTX64: ret
+ store i64 %a, i64 addrspace(0)* %ptr
+ ret void
+}
+
+;; f32
+
+define void @st_global_f32(float addrspace(0)* %ptr, float %a) {
+; PTX32: st.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.f32 [%rl{{[0-9]+}}], %f{{[0-9]+}}
+; PTX64: ret
+ store float %a, float addrspace(0)* %ptr
+ ret void
+}
+
+;; f64
+
+define void @st_global_f64(double addrspace(0)* %ptr, double %a) {
+; PTX32: st.f64 [%r{{[0-9]+}}], %fl{{[0-9]+}}
+; PTX32: ret
+; PTX64: st.f64 [%rl{{[0-9]+}}], %fl{{[0-9]+}}
+; PTX64: ret
+ store double %a, double addrspace(0)* %ptr
+ ret void
+}
diff --git a/test/CodeGen/PTX/20110926-sitofp.ll b/test/CodeGen/PTX/20110926-sitofp.ll
deleted file mode 100644
index 38d35c5ff44d..000000000000
--- a/test/CodeGen/PTX/20110926-sitofp.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-@A = common global [1536 x [1536 x float]] zeroinitializer, align 4
-@B = common global [1536 x [1536 x float]] zeroinitializer, align 4
-
-define internal ptx_device void @init_array(i32 %x, i32 %y) {
- %arrayidx103 = getelementptr [1536 x [1536 x float]]* @A, i32 0, i32 %x, i32 %y
- %arrayidx224 = getelementptr [1536 x [1536 x float]]* @B, i32 0, i32 %x, i32 %y
- %mul5 = mul i32 %x, %y
- %rem = srem i32 %mul5, 1024
- %add = add nsw i32 %rem, 1
-; CHECK: cvt.rn.f64.s32 %fd{{[0-9]+}}, %r{{[0-9]+}}
- %conv = sitofp i32 %add to double
- %div = fmul double %conv, 5.000000e-01
- %conv7 = fptrunc double %div to float
- store float %conv7, float* %arrayidx103, align 4
- %rem14 = srem i32 %mul5, 1024
- %add15 = add nsw i32 %rem14, 1
- %conv16 = sitofp i32 %add15 to double
- %div17 = fmul double %conv16, 5.000000e-01
- %conv18 = fptrunc double %div17 to float
- store float %conv18, float* %arrayidx224, align 4
- ret void
-}
diff --git a/test/CodeGen/PTX/add.ll b/test/CodeGen/PTX/add.ll
deleted file mode 100644
index 8b10d11cba75..000000000000
--- a/test/CodeGen/PTX/add.ll
+++ /dev/null
@@ -1,71 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-define ptx_device i16 @t1_u16(i16 %x, i16 %y) {
-; CHECK: add.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}, %rh{{[0-9]+}};
-; CHECK: ret;
- %z = add i16 %x, %y
- ret i16 %z
-}
-
-define ptx_device i32 @t1_u32(i32 %x, i32 %y) {
-; CHECK: add.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: ret;
- %z = add i32 %x, %y
- ret i32 %z
-}
-
-define ptx_device i64 @t1_u64(i64 %x, i64 %y) {
-; CHECK: add.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, %rd{{[0-9]+}};
-; CHECK: ret;
- %z = add i64 %x, %y
- ret i64 %z
-}
-
-define ptx_device float @t1_f32(float %x, float %y) {
-; CHECK: add.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
-; CHECK: ret;
- %z = fadd float %x, %y
- ret float %z
-}
-
-define ptx_device double @t1_f64(double %x, double %y) {
-; CHECK: add.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}
-; CHECK: ret;
- %z = fadd double %x, %y
- ret double %z
-}
-
-define ptx_device i16 @t2_u16(i16 %x) {
-; CHECK: add.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}, 1;
-; CHECK: ret;
- %z = add i16 %x, 1
- ret i16 %z
-}
-
-define ptx_device i32 @t2_u32(i32 %x) {
-; CHECK: add.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, 1;
-; CHECK: ret;
- %z = add i32 %x, 1
- ret i32 %z
-}
-
-define ptx_device i64 @t2_u64(i64 %x) {
-; CHECK: add.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, 1;
-; CHECK: ret;
- %z = add i64 %x, 1
- ret i64 %z
-}
-
-define ptx_device float @t2_f32(float %x) {
-; CHECK: add.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, 0D3FF0000000000000;
-; CHECK: ret;
- %z = fadd float %x, 1.0
- ret float %z
-}
-
-define ptx_device double @t2_f64(double %x) {
-; CHECK: add.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, 0D3FF0000000000000;
-; CHECK: ret;
- %z = fadd double %x, 1.0
- ret double %z
-}
diff --git a/test/CodeGen/PTX/aggregates.ll b/test/CodeGen/PTX/aggregates.ll
deleted file mode 100644
index 3fc0c408810b..000000000000
--- a/test/CodeGen/PTX/aggregates.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s
-; XFAIL: *
-
-%complex = type { float, float }
-
-define ptx_device %complex @complex_add(%complex %a, %complex %b) {
-entry:
-; CHECK: ld.param.f32 r[[R0:[0-9]+]], [__param_1];
-; CHECK-NEXT: ld.param.f32 r[[R2:[0-9]+]], [__param_3];
-; CHECK-NEXT: ld.param.f32 r[[R1:[0-9]+]], [__param_2];
-; CHECK-NEXT: ld.param.f32 r[[R3:[0-9]+]], [__param_4];
-; CHECK-NEXT: add.rn.f32 r[[R0]], r[[R0]], r[[R2]];
-; CHECK-NEXT: add.rn.f32 r[[R1]], r[[R1]], r[[R3]];
-; CHECK-NEXT: ret;
- %a.real = extractvalue %complex %a, 0
- %a.imag = extractvalue %complex %a, 1
- %b.real = extractvalue %complex %b, 0
- %b.imag = extractvalue %complex %b, 1
- %ret.real = fadd float %a.real, %b.real
- %ret.imag = fadd float %a.imag, %b.imag
- %ret.0 = insertvalue %complex undef, float %ret.real, 0
- %ret.1 = insertvalue %complex %ret.0, float %ret.imag, 1
- ret %complex %ret.1
-}
diff --git a/test/CodeGen/PTX/bitwise.ll b/test/CodeGen/PTX/bitwise.ll
deleted file mode 100644
index 1403a23d1dcd..000000000000
--- a/test/CodeGen/PTX/bitwise.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-; preds
-
-define ptx_device i32 @t1_and_preds(i1 %x, i1 %y) {
-; CHECK: and.pred %p{{[0-9]+}}, %p{{[0-9]+}}, %p{{[0-9]+}}
- %c = and i1 %x, %y
- %d = zext i1 %c to i32
- ret i32 %d
-}
-
-define ptx_device i32 @t1_or_preds(i1 %x, i1 %y) {
-; CHECK: or.pred %p{{[0-9]+}}, %p{{[0-9]+}}, %p{{[0-9]+}}
- %a = or i1 %x, %y
- %b = zext i1 %a to i32
- ret i32 %b
-}
-
-define ptx_device i32 @t1_xor_preds(i1 %x, i1 %y) {
-; CHECK: xor.pred %p{{[0-9]+}}, %p{{[0-9]+}}, %p{{[0-9]+}}
- %a = xor i1 %x, %y
- %b = zext i1 %a to i32
- ret i32 %b
-}
diff --git a/test/CodeGen/PTX/bra.ll b/test/CodeGen/PTX/bra.ll
deleted file mode 100644
index 464c29cca884..000000000000
--- a/test/CodeGen/PTX/bra.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-define ptx_device void @test_bra_direct() {
-; CHECK: bra $L__BB0_1;
-entry:
- br label %loop
-loop:
- br label %loop
-}
-
-define ptx_device i32 @test_bra_cond_direct(i32 %x, i32 %y) {
-entry:
-; CHECK: setp.le.u32 %p0, %r[[R0:[0-9]+]], %r[[R1:[0-9]+]]
- %p = icmp ugt i32 %x, %y
-; CHECK-NEXT: @%p0 bra
-; CHECK-NOT: bra
- br i1 %p, label %clause.if, label %clause.else
-clause.if:
-; CHECK: mov.u32 %ret{{[0-9]+}}, %r[[R0]]
- ret i32 %x
-clause.else:
-; CHECK: mov.u32 %ret{{[0-9]+}}, %r[[R1]]
- ret i32 %y
-}
diff --git a/test/CodeGen/PTX/cvt.ll b/test/CodeGen/PTX/cvt.ll
deleted file mode 100644
index f55070af2223..000000000000
--- a/test/CodeGen/PTX/cvt.ll
+++ /dev/null
@@ -1,290 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-; preds
-; (note: we convert back to i32 to return)
-
-define ptx_device i32 @cvt_pred_i16(i16 %x, i1 %y) {
-; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %rh{{[0-9]+}}, 0
-; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}};
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]];
-; CHECK: ret;
- %a = trunc i16 %x to i1
- %b = and i1 %a, %y
- %c = zext i1 %b to i32
- ret i32 %c
-}
-
-define ptx_device i32 @cvt_pred_i32(i32 %x, i1 %y) {
-; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0
-; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}};
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]];
-; CHECK: ret;
- %a = trunc i32 %x to i1
- %b = and i1 %a, %y
- %c = zext i1 %b to i32
- ret i32 %c
-}
-
-define ptx_device i32 @cvt_pred_i64(i64 %x, i1 %y) {
-; CHECK: setp.gt.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, 0
-; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}};
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]];
-; CHECK: ret;
- %a = trunc i64 %x to i1
- %b = and i1 %a, %y
- %c = zext i1 %b to i32
- ret i32 %c
-}
-
-define ptx_device i32 @cvt_pred_f32(float %x, i1 %y) {
-; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0
-; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}};
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]];
-; CHECK: ret;
- %a = fptoui float %x to i1
- %b = and i1 %a, %y
- %c = zext i1 %b to i32
- ret i32 %c
-}
-
-define ptx_device i32 @cvt_pred_f64(double %x, i1 %y) {
-; CHECK: setp.gt.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, 0
-; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}};
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]];
-; CHECK: ret;
- %a = fptoui double %x to i1
- %b = and i1 %a, %y
- %c = zext i1 %b to i32
- ret i32 %c
-}
-
-; i16
-
-define ptx_device i16 @cvt_i16_preds(i1 %x) {
-; CHECK: selp.u16 %ret{{[0-9]+}}, 1, 0, %p{{[0-9]+}};
-; CHECK: ret;
- %a = zext i1 %x to i16
- ret i16 %a
-}
-
-define ptx_device i16 @cvt_i16_i32(i32 %x) {
-; CHECK: cvt.u16.u32 %ret{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: ret;
- %a = trunc i32 %x to i16
- ret i16 %a
-}
-
-define ptx_device i16 @cvt_i16_i64(i64 %x) {
-; CHECK: cvt.u16.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}};
-; CHECK: ret;
- %a = trunc i64 %x to i16
- ret i16 %a
-}
-
-define ptx_device i16 @cvt_i16_f32(float %x) {
-; CHECK: cvt.rzi.u16.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
-; CHECK: ret;
- %a = fptoui float %x to i16
- ret i16 %a
-}
-
-define ptx_device i16 @cvt_i16_f64(double %x) {
-; CHECK: cvt.rzi.u16.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
-; CHECK: ret;
- %a = fptoui double %x to i16
- ret i16 %a
-}
-
-; i32
-
-define ptx_device i32 @cvt_i32_preds(i1 %x) {
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p{{[0-9]+}};
-; CHECK: ret;
- %a = zext i1 %x to i32
- ret i32 %a
-}
-
-define ptx_device i32 @cvt_i32_i16(i16 %x) {
-; CHECK: cvt.u32.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}};
-; CHECK: ret;
- %a = zext i16 %x to i32
- ret i32 %a
-}
-
-define ptx_device i32 @cvt_i32_i64(i64 %x) {
-; CHECK: cvt.u32.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}};
-; CHECK: ret;
- %a = trunc i64 %x to i32
- ret i32 %a
-}
-
-define ptx_device i32 @cvt_i32_f32(float %x) {
-; CHECK: cvt.rzi.u32.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
-; CHECK: ret;
- %a = fptoui float %x to i32
- ret i32 %a
-}
-
-define ptx_device i32 @cvt_i32_f64(double %x) {
-; CHECK: cvt.rzi.u32.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
-; CHECK: ret;
- %a = fptoui double %x to i32
- ret i32 %a
-}
-
-; i64
-
-define ptx_device i64 @cvt_i64_preds(i1 %x) {
-; CHECK: selp.u64 %ret{{[0-9]+}}, 1, 0, %p{{[0-9]+}};
-; CHECK: ret;
- %a = zext i1 %x to i64
- ret i64 %a
-}
-
-define ptx_device i64 @cvt_i64_i16(i16 %x) {
-; CHECK: cvt.u64.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}};
-; CHECK: ret;
- %a = zext i16 %x to i64
- ret i64 %a
-}
-
-define ptx_device i64 @cvt_i64_i32(i32 %x) {
-; CHECK: cvt.u64.u32 %ret{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: ret;
- %a = zext i32 %x to i64
- ret i64 %a
-}
-
-define ptx_device i64 @cvt_i64_f32(float %x) {
-; CHECK: cvt.rzi.u64.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
-; CHECK: ret;
- %a = fptoui float %x to i64
- ret i64 %a
-}
-
-define ptx_device i64 @cvt_i64_f64(double %x) {
-; CHECK: cvt.rzi.u64.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
-; CHECK: ret;
- %a = fptoui double %x to i64
- ret i64 %a
-}
-
-; f32
-
-define ptx_device float @cvt_f32_preds(i1 %x) {
-; CHECK: mov.b32 %f0, 0;
-; CHECK: mov.b32 %f1, 1065353216;
-; CHECK: selp.f32 %ret{{[0-9]+}}, %f1, %f0, %p{{[0-9]+}};
-; CHECK: ret;
- %a = uitofp i1 %x to float
- ret float %a
-}
-
-define ptx_device float @cvt_f32_i16(i16 %x) {
-; CHECK: cvt.rn.f32.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}};
-; CHECK: ret;
- %a = uitofp i16 %x to float
- ret float %a
-}
-
-define ptx_device float @cvt_f32_i32(i32 %x) {
-; CHECK: cvt.rn.f32.u32 %ret{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: ret;
- %a = uitofp i32 %x to float
- ret float %a
-}
-
-define ptx_device float @cvt_f32_i64(i64 %x) {
-; CHECK: cvt.rn.f32.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}};
-; CHECK: ret;
- %a = uitofp i64 %x to float
- ret float %a
-}
-
-define ptx_device float @cvt_f32_f64(double %x) {
-; CHECK: cvt.rn.f32.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
-; CHECK: ret;
- %a = fptrunc double %x to float
- ret float %a
-}
-
-define ptx_device float @cvt_f32_s16(i16 %x) {
-; CHECK: cvt.rn.f32.s16 %ret{{[0-9]+}}, %rh{{[0-9]+}}
-; CHECK: ret
- %a = sitofp i16 %x to float
- ret float %a
-}
-
-define ptx_device float @cvt_f32_s32(i32 %x) {
-; CHECK: cvt.rn.f32.s32 %ret{{[0-9]+}}, %r{{[0-9]+}}
-; CHECK: ret
- %a = sitofp i32 %x to float
- ret float %a
-}
-
-define ptx_device float @cvt_f32_s64(i64 %x) {
-; CHECK: cvt.rn.f32.s64 %ret{{[0-9]+}}, %rd{{[0-9]+}}
-; CHECK: ret
- %a = sitofp i64 %x to float
- ret float %a
-}
-
-; f64
-
-define ptx_device double @cvt_f64_preds(i1 %x) {
-; CHECK: mov.b64 %fd0, 0;
-; CHECK: mov.b64 %fd1, 4575657221408423936;
-; CHECK: selp.f64 %ret{{[0-9]+}}, %fd1, %fd0, %p{{[0-9]+}};
-; CHECK: ret;
- %a = uitofp i1 %x to double
- ret double %a
-}
-
-define ptx_device double @cvt_f64_i16(i16 %x) {
-; CHECK: cvt.rn.f64.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}};
-; CHECK: ret;
- %a = uitofp i16 %x to double
- ret double %a
-}
-
-define ptx_device double @cvt_f64_i32(i32 %x) {
-; CHECK: cvt.rn.f64.u32 %ret{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: ret;
- %a = uitofp i32 %x to double
- ret double %a
-}
-
-define ptx_device double @cvt_f64_i64(i64 %x) {
-; CHECK: cvt.rn.f64.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}};
-; CHECK: ret;
- %a = uitofp i64 %x to double
- ret double %a
-}
-
-define ptx_device double @cvt_f64_f32(float %x) {
-; CHECK: cvt.f64.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
-; CHECK: ret;
- %a = fpext float %x to double
- ret double %a
-}
-
-define ptx_device double @cvt_f64_s16(i16 %x) {
-; CHECK: cvt.rn.f64.s16 %ret{{[0-9]+}}, %rh{{[0-9]+}}
-; CHECK: ret
- %a = sitofp i16 %x to double
- ret double %a
-}
-
-define ptx_device double @cvt_f64_s32(i32 %x) {
-; CHECK: cvt.rn.f64.s32 %ret{{[0-9]+}}, %r{{[0-9]+}}
-; CHECK: ret
- %a = sitofp i32 %x to double
- ret double %a
-}
-
-define ptx_device double @cvt_f64_s64(i64 %x) {
-; CHECK: cvt.rn.f64.s64 %ret{{[0-9]+}}, %rd{{[0-9]+}}
-; CHECK: ret
- %a = sitofp i64 %x to double
- ret double %a
-}
diff --git a/test/CodeGen/PTX/exit.ll b/test/CodeGen/PTX/exit.ll
deleted file mode 100644
index 7816c801728f..000000000000
--- a/test/CodeGen/PTX/exit.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-define ptx_kernel void @t1() {
-; CHECK: exit;
-; CHECK-NOT: ret;
- ret void
-}
-
-define ptx_kernel void @t2(i32* %p, i32 %x) {
- store i32 %x, i32* %p
-; CHECK: exit;
-; CHECK-NOT: ret;
- ret void
-}
diff --git a/test/CodeGen/PTX/fdiv-sm10.ll b/test/CodeGen/PTX/fdiv-sm10.ll
deleted file mode 100644
index e1013befa268..000000000000
--- a/test/CodeGen/PTX/fdiv-sm10.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=ptx32 -mattr=+sm10 | FileCheck %s
-
-define ptx_device float @t1_f32(float %x, float %y) {
-; CHECK: div.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
-; CHECK: ret;
- %a = fdiv float %x, %y
- ret float %a
-}
-
-define ptx_device double @t1_f64(double %x, double %y) {
-; CHECK: div.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
-; CHECK: ret;
- %a = fdiv double %x, %y
- ret double %a
-}
diff --git a/test/CodeGen/PTX/fdiv-sm13.ll b/test/CodeGen/PTX/fdiv-sm13.ll
deleted file mode 100644
index 1afa2ebd08e9..000000000000
--- a/test/CodeGen/PTX/fdiv-sm13.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s
-
-define ptx_device float @t1_f32(float %x, float %y) {
-; CHECK: div.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
-; CHECK: ret;
- %a = fdiv float %x, %y
- ret float %a
-}
-
-define ptx_device double @t1_f64(double %x, double %y) {
-; CHECK: div.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
-; CHECK: ret;
- %a = fdiv double %x, %y
- ret double %a
-}
diff --git a/test/CodeGen/PTX/fneg.ll b/test/CodeGen/PTX/fneg.ll
deleted file mode 100644
index 2b76e638f682..000000000000
--- a/test/CodeGen/PTX/fneg.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-define ptx_device float @t1_f32(float %x) {
-; CHECK: neg.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
-; CHECK: ret;
- %y = fsub float -0.000000e+00, %x
- ret float %y
-}
-
-define ptx_device double @t1_f64(double %x) {
-; CHECK: neg.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
-; CHECK: ret;
- %y = fsub double -0.000000e+00, %x
- ret double %y
-}
diff --git a/test/CodeGen/PTX/ld.ll b/test/CodeGen/PTX/ld.ll
deleted file mode 100644
index e55820dfb0ea..000000000000
--- a/test/CodeGen/PTX/ld.ll
+++ /dev/null
@@ -1,382 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-;CHECK: .extern .global .b16 array_i16[10];
-@array_i16 = external global [10 x i16]
-
-;CHECK: .extern .const .b16 array_constant_i16[10];
-@array_constant_i16 = external addrspace(1) constant [10 x i16]
-
-;CHECK: .extern .shared .b16 array_shared_i16[10];
-@array_shared_i16 = external addrspace(4) global [10 x i16]
-
-;CHECK: .extern .global .b32 array_i32[10];
-@array_i32 = external global [10 x i32]
-
-;CHECK: .extern .const .b32 array_constant_i32[10];
-@array_constant_i32 = external addrspace(1) constant [10 x i32]
-
-;CHECK: .extern .shared .b32 array_shared_i32[10];
-@array_shared_i32 = external addrspace(4) global [10 x i32]
-
-;CHECK: .extern .global .b64 array_i64[10];
-@array_i64 = external global [10 x i64]
-
-;CHECK: .extern .const .b64 array_constant_i64[10];
-@array_constant_i64 = external addrspace(1) constant [10 x i64]
-
-;CHECK: .extern .shared .b64 array_shared_i64[10];
-@array_shared_i64 = external addrspace(4) global [10 x i64]
-
-;CHECK: .extern .global .b32 array_float[10];
-@array_float = external global [10 x float]
-
-;CHECK: .extern .const .b32 array_constant_float[10];
-@array_constant_float = external addrspace(1) constant [10 x float]
-
-;CHECK: .extern .shared .b32 array_shared_float[10];
-@array_shared_float = external addrspace(4) global [10 x float]
-
-;CHECK: .extern .global .b64 array_double[10];
-@array_double = external global [10 x double]
-
-;CHECK: .extern .const .b64 array_constant_double[10];
-@array_constant_double = external addrspace(1) constant [10 x double]
-
-;CHECK: .extern .shared .b64 array_shared_double[10];
-@array_shared_double = external addrspace(4) global [10 x double]
-
-
-define ptx_device i16 @t1_u16(i16* %p) {
-entry:
-;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
-;CHECK: ret;
- %x = load i16* %p
- ret i16 %x
-}
-
-define ptx_device i32 @t1_u32(i32* %p) {
-entry:
-;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
-;CHECK: ret;
- %x = load i32* %p
- ret i32 %x
-}
-
-define ptx_device i64 @t1_u64(i64* %p) {
-entry:
-;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
-;CHECK: ret;
- %x = load i64* %p
- ret i64 %x
-}
-
-define ptx_device float @t1_f32(float* %p) {
-entry:
-;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
-;CHECK: ret;
- %x = load float* %p
- ret float %x
-}
-
-define ptx_device double @t1_f64(double* %p) {
-entry:
-;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
-;CHECK: ret;
- %x = load double* %p
- ret double %x
-}
-
-define ptx_device i16 @t2_u16(i16* %p) {
-entry:
-;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r{{[0-9]+}}+2];
-;CHECK: ret;
- %i = getelementptr i16* %p, i32 1
- %x = load i16* %i
- ret i16 %x
-}
-
-define ptx_device i32 @t2_u32(i32* %p) {
-entry:
-;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r{{[0-9]+}}+4];
-;CHECK: ret;
- %i = getelementptr i32* %p, i32 1
- %x = load i32* %i
- ret i32 %x
-}
-
-define ptx_device i64 @t2_u64(i64* %p) {
-entry:
-;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r{{[0-9]+}}+8];
-;CHECK: ret;
- %i = getelementptr i64* %p, i32 1
- %x = load i64* %i
- ret i64 %x
-}
-
-define ptx_device float @t2_f32(float* %p) {
-entry:
-;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r{{[0-9]+}}+4];
-;CHECK: ret;
- %i = getelementptr float* %p, i32 1
- %x = load float* %i
- ret float %x
-}
-
-define ptx_device double @t2_f64(double* %p) {
-entry:
-;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r{{[0-9]+}}+8];
-;CHECK: ret;
- %i = getelementptr double* %p, i32 1
- %x = load double* %i
- ret double %x
-}
-
-define ptx_device i16 @t3_u16(i16* %p, i32 %q) {
-entry:
-;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 1;
-;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
-;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
- %i = getelementptr i16* %p, i32 %q
- %x = load i16* %i
- ret i16 %x
-}
-
-define ptx_device i32 @t3_u32(i32* %p, i32 %q) {
-entry:
-;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2;
-;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
-;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
- %i = getelementptr i32* %p, i32 %q
- %x = load i32* %i
- ret i32 %x
-}
-
-define ptx_device i64 @t3_u64(i64* %p, i32 %q) {
-entry:
-;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3;
-;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
-;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
- %i = getelementptr i64* %p, i32 %q
- %x = load i64* %i
- ret i64 %x
-}
-
-define ptx_device float @t3_f32(float* %p, i32 %q) {
-entry:
-;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2;
-;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
-;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
- %i = getelementptr float* %p, i32 %q
- %x = load float* %i
- ret float %x
-}
-
-define ptx_device double @t3_f64(double* %p, i32 %q) {
-entry:
-;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3;
-;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
-;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
- %i = getelementptr double* %p, i32 %q
- %x = load double* %i
- ret double %x
-}
-
-define ptx_device i16 @t4_global_u16() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16;
-;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r[[R0]]];
-;CHECK: ret;
- %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 0
- %x = load i16* %i
- ret i16 %x
-}
-
-define ptx_device i32 @t4_global_u32() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32;
-;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r[[R0]]];
-;CHECK: ret;
- %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0
- %x = load i32* %i
- ret i32 %x
-}
-
-define ptx_device i64 @t4_global_u64() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64;
-;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r[[R0]]];
-;CHECK: ret;
- %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0
- %x = load i64* %i
- ret i64 %x
-}
-
-define ptx_device float @t4_global_f32() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float;
-;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r[[R0]]];
-;CHECK: ret;
- %i = getelementptr [10 x float]* @array_float, i32 0, i32 0
- %x = load float* %i
- ret float %x
-}
-
-define ptx_device double @t4_global_f64() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double;
-;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r[[R0]]];
-;CHECK: ret;
- %i = getelementptr [10 x double]* @array_double, i32 0, i32 0
- %x = load double* %i
- ret double %x
-}
-
-define ptx_device i16 @t4_const_u16() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_i16;
-;CHECK: ld.const.u16 %ret{{[0-9]+}}, [%r[[R0]]];
-;CHECK: ret;
- %i = getelementptr [10 x i16] addrspace(1)* @array_constant_i16, i32 0, i32 0
- %x = load i16 addrspace(1)* %i
- ret i16 %x
-}
-
-define ptx_device i32 @t4_const_u32() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_i32;
-;CHECK: ld.const.u32 %ret{{[0-9]+}}, [%r[[R0]]];
-;CHECK: ret;
- %i = getelementptr [10 x i32] addrspace(1)* @array_constant_i32, i32 0, i32 0
- %x = load i32 addrspace(1)* %i
- ret i32 %x
-}
-
-define ptx_device i64 @t4_const_u64() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_i64;
-;CHECK: ld.const.u64 %ret{{[0-9]+}}, [%r[[R0]]];
-;CHECK: ret;
- %i = getelementptr [10 x i64] addrspace(1)* @array_constant_i64, i32 0, i32 0
- %x = load i64 addrspace(1)* %i
- ret i64 %x
-}
-
-define ptx_device float @t4_const_f32() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_float;
-;CHECK: ld.const.f32 %ret{{[0-9]+}}, [%r[[R0]]];
-;CHECK: ret;
- %i = getelementptr [10 x float] addrspace(1)* @array_constant_float, i32 0, i32 0
- %x = load float addrspace(1)* %i
- ret float %x
-}
-
-define ptx_device double @t4_const_f64() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_double;
-;CHECK: ld.const.f64 %ret{{[0-9]+}}, [%r[[R0]]];
-;CHECK: ret;
- %i = getelementptr [10 x double] addrspace(1)* @array_constant_double, i32 0, i32 0
- %x = load double addrspace(1)* %i
- ret double %x
-}
-
-define ptx_device i16 @t4_shared_u16() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i16;
-;CHECK: ld.shared.u16 %ret{{[0-9]+}}, [%r[[R0]]];
-;CHECK: ret;
- %i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0
- %x = load i16 addrspace(4)* %i
- ret i16 %x
-}
-
-define ptx_device i32 @t4_shared_u32() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i32;
-;CHECK: ld.shared.u32 %ret{{[0-9]+}}, [%r[[R0]]];
-;CHECK: ret;
- %i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0
- %x = load i32 addrspace(4)* %i
- ret i32 %x
-}
-
-define ptx_device i64 @t4_shared_u64() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i64;
-;CHECK: ld.shared.u64 %ret{{[0-9]+}}, [%r[[R0]]];
-;CHECK: ret;
- %i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0
- %x = load i64 addrspace(4)* %i
- ret i64 %x
-}
-
-define ptx_device float @t4_shared_f32() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_float;
-;CHECK: ld.shared.f32 %ret{{[0-9]+}}, [%r[[R0]]];
-;CHECK: ret;
- %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0
- %x = load float addrspace(4)* %i
- ret float %x
-}
-
-define ptx_device double @t4_shared_f64() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_double;
-;CHECK: ld.shared.f64 %ret{{[0-9]+}}, [%r[[R0]]];
-;CHECK: ret;
- %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0
- %x = load double addrspace(4)* %i
- ret double %x
-}
-
-define ptx_device i16 @t5_u16() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16;
-;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r[[R0]]+2];
-;CHECK: ret;
- %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1
- %x = load i16* %i
- ret i16 %x
-}
-
-define ptx_device i32 @t5_u32() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32;
-;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r[[R0]]+4];
-;CHECK: ret;
- %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1
- %x = load i32* %i
- ret i32 %x
-}
-
-define ptx_device i64 @t5_u64() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64;
-;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r[[R0]]+8];
-;CHECK: ret;
- %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1
- %x = load i64* %i
- ret i64 %x
-}
-
-define ptx_device float @t5_f32() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float;
-;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r[[R0]]+4];
-;CHECK: ret;
- %i = getelementptr [10 x float]* @array_float, i32 0, i32 1
- %x = load float* %i
- ret float %x
-}
-
-define ptx_device double @t5_f64() {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double;
-;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r[[R0]]+8];
-;CHECK: ret;
- %i = getelementptr [10 x double]* @array_double, i32 0, i32 1
- %x = load double* %i
- ret double %x
-}
diff --git a/test/CodeGen/PTX/llvm-intrinsic.ll b/test/CodeGen/PTX/llvm-intrinsic.ll
deleted file mode 100644
index e73ad256a1c4..000000000000
--- a/test/CodeGen/PTX/llvm-intrinsic.ll
+++ /dev/null
@@ -1,56 +0,0 @@
-; RUN: llc < %s -march=ptx32 -mattr=+ptx20 | FileCheck %s
-
-define ptx_device float @test_sqrt_f32(float %x) {
-entry:
-; CHECK: sqrt.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
-; CHECK: ret;
- %y = call float @llvm.sqrt.f32(float %x)
- ret float %y
-}
-
-define ptx_device double @test_sqrt_f64(double %x) {
-entry:
-; CHECK: sqrt.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
-; CHECK: ret;
- %y = call double @llvm.sqrt.f64(double %x)
- ret double %y
-}
-
-define ptx_device float @test_sin_f32(float %x) {
-entry:
-; CHECK: sin.approx.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
-; CHECK: ret;
- %y = call float @llvm.sin.f32(float %x)
- ret float %y
-}
-
-define ptx_device double @test_sin_f64(double %x) {
-entry:
-; CHECK: sin.approx.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
-; CHECK: ret;
- %y = call double @llvm.sin.f64(double %x)
- ret double %y
-}
-
-define ptx_device float @test_cos_f32(float %x) {
-entry:
-; CHECK: cos.approx.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
-; CHECK: ret;
- %y = call float @llvm.cos.f32(float %x)
- ret float %y
-}
-
-define ptx_device double @test_cos_f64(double %x) {
-entry:
-; CHECK: cos.approx.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
-; CHECK: ret;
- %y = call double @llvm.cos.f64(double %x)
- ret double %y
-}
-
-declare float @llvm.sqrt.f32(float)
-declare double @llvm.sqrt.f64(double)
-declare float @llvm.sin.f32(float)
-declare double @llvm.sin.f64(double)
-declare float @llvm.cos.f32(float)
-declare double @llvm.cos.f64(double)
diff --git a/test/CodeGen/PTX/mad-disabling.ll b/test/CodeGen/PTX/mad-disabling.ll
deleted file mode 100644
index 603c3ba69f79..000000000000
--- a/test/CodeGen/PTX/mad-disabling.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20 | FileCheck %s -check-prefix=FMA
-; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20,+no-fma | FileCheck %s -check-prefix=MUL
-; RUN: llc < %s -march=ptx64 -mattr=+ptx20,+sm20 | FileCheck %s -check-prefix=FMA
-; RUN: llc < %s -march=ptx64 -mattr=+ptx20,+sm20,+no-fma | FileCheck %s -check-prefix=MUL
-
-define ptx_device float @test_mul_add_f(float %x, float %y, float %z) {
-entry:
-; FMA: mad.rn.f32
-; MUL: mul.rn.f32
-; MUL: add.rn.f32
- %a = fmul float %x, %y
- %b = fadd float %a, %z
- ret float %b
-}
-
-define ptx_device double @test_mul_add_d(double %x, double %y, double %z) {
-entry:
-; FMA: mad.rn.f64
-; MUL: mul.rn.f64
-; MUL: add.rn.f64
- %a = fmul double %x, %y
- %b = fadd double %a, %z
- ret double %b
-}
diff --git a/test/CodeGen/PTX/mad.ll b/test/CodeGen/PTX/mad.ll
deleted file mode 100644
index cc28e3fa1cd3..000000000000
--- a/test/CodeGen/PTX/mad.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s
-
-define ptx_device float @t1_f32(float %x, float %y, float %z) {
-; CHECK: mad.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
-; CHECK: ret;
- %a = fmul float %x, %y
- %b = fadd float %a, %z
- ret float %b
-}
-
-define ptx_device double @t1_f64(double %x, double %y, double %z) {
-; CHECK: mad.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
-; CHECK: ret;
- %a = fmul double %x, %y
- %b = fadd double %a, %z
- ret double %b
-}
diff --git a/test/CodeGen/PTX/mov.ll b/test/CodeGen/PTX/mov.ll
deleted file mode 100644
index 9e501be03eeb..000000000000
--- a/test/CodeGen/PTX/mov.ll
+++ /dev/null
@@ -1,62 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-define ptx_device i16 @t1_u16() {
-; CHECK: mov.u16 %ret{{[0-9]+}}, 0;
-; CHECK: ret;
- ret i16 0
-}
-
-define ptx_device i32 @t1_u32() {
-; CHECK: mov.u32 %ret{{[0-9]+}}, 0;
-; CHECK: ret;
- ret i32 0
-}
-
-define ptx_device i64 @t1_u64() {
-; CHECK: mov.u64 %ret{{[0-9]+}}, 0;
-; CHECK: ret;
- ret i64 0
-}
-
-define ptx_device float @t1_f32() {
-; CHECK: mov.f32 %ret{{[0-9]+}}, 0D0000000000000000;
-; CHECK: ret;
- ret float 0.0
-}
-
-define ptx_device double @t1_f64() {
-; CHECK: mov.f64 %ret{{[0-9]+}}, 0D0000000000000000;
-; CHECK: ret;
- ret double 0.0
-}
-
-define ptx_device i16 @t2_u16(i16 %x) {
-; CHECK: mov.b16 %ret{{[0-9]+}}, %arg{{[0-9]+}};
-; CHECK: ret;
- ret i16 %x
-}
-
-define ptx_device i32 @t2_u32(i32 %x) {
-; CHECK: mov.b32 %ret{{[0-9]+}}, %arg{{[0-9]+}};
-; CHECK: ret;
- ret i32 %x
-}
-
-define ptx_device i64 @t2_u64(i64 %x) {
-; CHECK: mov.b64 %ret{{[0-9]+}}, %arg{{[0-9]+}};
-; CHECK: ret;
- ret i64 %x
-}
-
-define ptx_device float @t3_f32(float %x) {
-; CHECK: mov.f32 %ret{{[0-9]+}}, %arg{{[0-9]+}};
-; CHECK: ret;
- ret float %x
-}
-
-define ptx_device double @t3_f64(double %x) {
-; CHECK: mov.f64 %ret{{[0-9]+}}, %arg{{[0-9]+}};
-; CHECK: ret;
- ret double %x
-}
-
diff --git a/test/CodeGen/PTX/mul.ll b/test/CodeGen/PTX/mul.ll
deleted file mode 100644
index 91949db73c39..000000000000
--- a/test/CodeGen/PTX/mul.ll
+++ /dev/null
@@ -1,39 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-;define ptx_device i32 @t1(i32 %x, i32 %y) {
-; %z = mul i32 %x, %y
-; ret i32 %z
-;}
-
-;define ptx_device i32 @t2(i32 %x) {
-; %z = mul i32 %x, 1
-; ret i32 %z
-;}
-
-define ptx_device float @t1_f32(float %x, float %y) {
-; CHECK: mul.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
-; CHECK: ret;
- %z = fmul float %x, %y
- ret float %z
-}
-
-define ptx_device double @t1_f64(double %x, double %y) {
-; CHECK: mul.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}
-; CHECK: ret;
- %z = fmul double %x, %y
- ret double %z
-}
-
-define ptx_device float @t2_f32(float %x) {
-; CHECK: mul.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, 0D4014000000000000;
-; CHECK: ret;
- %z = fmul float %x, 5.0
- ret float %z
-}
-
-define ptx_device double @t2_f64(double %x) {
-; CHECK: mul.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, 0D4014000000000000;
-; CHECK: ret;
- %z = fmul double %x, 5.0
- ret double %z
-}
diff --git a/test/CodeGen/PTX/options.ll b/test/CodeGen/PTX/options.ll
deleted file mode 100644
index 0fb6602fe87c..000000000000
--- a/test/CodeGen/PTX/options.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=ptx32 -mattr=ptx20 | grep ".version 2.0"
-; RUN: llc < %s -march=ptx32 -mattr=ptx21 | grep ".version 2.1"
-; RUN: llc < %s -march=ptx32 -mattr=ptx22 | grep ".version 2.2"
-; RUN: llc < %s -march=ptx32 -mattr=ptx23 | grep ".version 2.3"
-; RUN: llc < %s -march=ptx32 -mattr=sm10 | grep ".target sm_10"
-; RUN: llc < %s -march=ptx32 -mattr=sm13 | grep ".target sm_13"
-; RUN: llc < %s -march=ptx32 -mattr=sm20 | grep ".target sm_20"
-; RUN: llc < %s -march=ptx32 -mattr=ptx23 | grep ".address_size 32"
-; RUN: llc < %s -march=ptx64 -mattr=ptx23 | grep ".address_size 64"
-
-define ptx_device void @t1() {
- ret void
-}
diff --git a/test/CodeGen/PTX/parameter-order.ll b/test/CodeGen/PTX/parameter-order.ll
deleted file mode 100644
index 377f17379fe1..000000000000
--- a/test/CodeGen/PTX/parameter-order.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-; CHECK: .func (.reg .b32 %ret{{[0-9]+}}) test_parameter_order (.reg .f32 %arg{{[0-9]+}}, .reg .b32 %arg{{[0-9]+}}, .reg .b32 %arg{{[0-9]+}}, .reg .f32 %arg{{[0-9]+}})
-define ptx_device i32 @test_parameter_order(float %a, i32 %b, i32 %c, float %d) {
-; CHECK: sub.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
- %result = sub i32 %b, %c
- ret i32 %result
-}
diff --git a/test/CodeGen/PTX/printf.ll b/test/CodeGen/PTX/printf.ll
deleted file mode 100644
index f901b2055f0d..000000000000
--- a/test/CodeGen/PTX/printf.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s -march=ptx64 -mattr=+ptx20,+sm20 | FileCheck %s
-
-declare i32 @printf(i8*, ...)
-
-@str = private unnamed_addr constant [6 x i8] c"test\0A\00"
-
-define ptx_device void @t1_printf() {
-; CHECK: mov.u64 %rd{{[0-9]+}}, $L__str;
-; CHECK: call.uni (__localparam_{{[0-9]+}}), vprintf, (__localparam_{{[0-9]+}}, __localparam_{{[0-9]+}});
-; CHECK: ret;
- %1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @str, i64 0, i64 0))
- ret void
-}
-
-@str2 = private unnamed_addr constant [11 x i8] c"test = %f\0A\00"
-
-define ptx_device void @t2_printf() {
-; CHECK: .local .align 8 .b8 __local{{[0-9]+}}[{{[0-9]+}}];
-; CHECK: mov.u64 %rd{{[0-9]+}}, $L__str2;
-; CHECK: cvta.local.u64 %rd{{[0-9]+}}, __local{{[0-9+]}};
-; CHECK: call.uni (__localparam_{{[0-9]+}}), vprintf, (__localparam_{{[0-9]+}}, __localparam_{{[0-9]+}});
-; CHECK: ret;
- %1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @str2, i64 0, i64 0), double 0x3FF3333340000000)
- ret void
-}
diff --git a/test/CodeGen/PTX/ret.ll b/test/CodeGen/PTX/ret.ll
deleted file mode 100644
index ba0523f6424a..000000000000
--- a/test/CodeGen/PTX/ret.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-define ptx_device void @t1() {
-; CHECK: ret;
-; CHECK-NOT: exit;
- ret void
-}
diff --git a/test/CodeGen/PTX/selp.ll b/test/CodeGen/PTX/selp.ll
deleted file mode 100644
index aa7ce850b176..000000000000
--- a/test/CodeGen/PTX/selp.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-define ptx_device i32 @test_selp_i32(i1 %x, i32 %y, i32 %z) {
-; CHECK: selp.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %p{{[0-9]+}};
- %a = select i1 %x, i32 %y, i32 %z
- ret i32 %a
-}
-
-define ptx_device i64 @test_selp_i64(i1 %x, i64 %y, i64 %z) {
-; CHECK: selp.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, %rd{{[0-9]+}}, %p{{[0-9]+}};
- %a = select i1 %x, i64 %y, i64 %z
- ret i64 %a
-}
-
-define ptx_device float @test_selp_f32(i1 %x, float %y, float %z) {
-; CHECK: selp.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %p{{[0-9]+}};
- %a = select i1 %x, float %y, float %z
- ret float %a
-}
-
-define ptx_device double @test_selp_f64(i1 %x, double %y, double %z) {
-; CHECK: selp.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %p{{[0-9]+}};
- %a = select i1 %x, double %y, double %z
- ret double %a
-}
diff --git a/test/CodeGen/PTX/setp.ll b/test/CodeGen/PTX/setp.ll
deleted file mode 100644
index 646abab19af2..000000000000
--- a/test/CodeGen/PTX/setp.ll
+++ /dev/null
@@ -1,206 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-define ptx_device i32 @test_setp_eq_u32_rr(i32 %x, i32 %y) {
-; CHECK: setp.eq.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
- %p = icmp eq i32 %x, %y
- %z = zext i1 %p to i32
- ret i32 %z
-}
-
-define ptx_device i32 @test_setp_ne_u32_rr(i32 %x, i32 %y) {
-; CHECK: setp.ne.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
- %p = icmp ne i32 %x, %y
- %z = zext i1 %p to i32
- ret i32 %z
-}
-
-define ptx_device i32 @test_setp_lt_u32_rr(i32 %x, i32 %y) {
-; CHECK: setp.lt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
- %p = icmp ult i32 %x, %y
- %z = zext i1 %p to i32
- ret i32 %z
-}
-
-define ptx_device i32 @test_setp_le_u32_rr(i32 %x, i32 %y) {
-; CHECK: setp.le.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
- %p = icmp ule i32 %x, %y
- %z = zext i1 %p to i32
- ret i32 %z
-}
-
-define ptx_device i32 @test_setp_gt_u32_rr(i32 %x, i32 %y) {
-; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
- %p = icmp ugt i32 %x, %y
- %z = zext i1 %p to i32
- ret i32 %z
-}
-
-define ptx_device i32 @test_setp_ge_u32_rr(i32 %x, i32 %y) {
-; CHECK: setp.ge.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
- %p = icmp uge i32 %x, %y
- %z = zext i1 %p to i32
- ret i32 %z
-}
-
-define ptx_device i32 @test_setp_lt_s32_rr(i32 %x, i32 %y) {
-; CHECK: setp.lt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
- %p = icmp slt i32 %x, %y
- %z = zext i1 %p to i32
- ret i32 %z
-}
-
-define ptx_device i32 @test_setp_le_s32_rr(i32 %x, i32 %y) {
-; CHECK: setp.le.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
- %p = icmp sle i32 %x, %y
- %z = zext i1 %p to i32
- ret i32 %z
-}
-
-define ptx_device i32 @test_setp_gt_s32_rr(i32 %x, i32 %y) {
-; CHECK: setp.gt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
- %p = icmp sgt i32 %x, %y
- %z = zext i1 %p to i32
- ret i32 %z
-}
-
-define ptx_device i32 @test_setp_ge_s32_rr(i32 %x, i32 %y) {
-; CHECK: setp.ge.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
- %p = icmp sge i32 %x, %y
- %z = zext i1 %p to i32
- ret i32 %z
-}
-
-define ptx_device i32 @test_setp_eq_u32_ri(i32 %x) {
-; CHECK: setp.eq.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1;
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
- %p = icmp eq i32 %x, 1
- %z = zext i1 %p to i32
- ret i32 %z
-}
-
-define ptx_device i32 @test_setp_ne_u32_ri(i32 %x) {
-; CHECK: setp.ne.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1;
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
- %p = icmp ne i32 %x, 1
- %z = zext i1 %p to i32
- ret i32 %z
-}
-
-define ptx_device i32 @test_setp_lt_u32_ri(i32 %x) {
-; CHECK: setp.eq.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0;
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
- %p = icmp ult i32 %x, 1
- %z = zext i1 %p to i32
- ret i32 %z
-}
-
-define ptx_device i32 @test_setp_le_u32_ri(i32 %x) {
-; CHECK: setp.lt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 2;
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
- %p = icmp ule i32 %x, 1
- %z = zext i1 %p to i32
- ret i32 %z
-}
-
-define ptx_device i32 @test_setp_gt_u32_ri(i32 %x) {
-; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1;
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
- %p = icmp ugt i32 %x, 1
- %z = zext i1 %p to i32
- ret i32 %z
-}
-
-define ptx_device i32 @test_setp_ge_u32_ri(i32 %x) {
-; CHECK: setp.ne.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0;
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
- %p = icmp uge i32 %x, 1
- %z = zext i1 %p to i32
- ret i32 %z
-}
-
-define ptx_device i32 @test_setp_lt_s32_ri(i32 %x) {
-; CHECK: setp.lt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1;
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
- %p = icmp slt i32 %x, 1
- %z = zext i1 %p to i32
- ret i32 %z
-}
-
-define ptx_device i32 @test_setp_le_s32_ri(i32 %x) {
-; CHECK: setp.lt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 2;
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
- %p = icmp sle i32 %x, 1
- %z = zext i1 %p to i32
- ret i32 %z
-}
-
-define ptx_device i32 @test_setp_gt_s32_ri(i32 %x) {
-; CHECK: setp.gt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1;
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
- %p = icmp sgt i32 %x, 1
- %z = zext i1 %p to i32
- ret i32 %z
-}
-
-define ptx_device i32 @test_setp_ge_s32_ri(i32 %x) {
-; CHECK: setp.gt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0;
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
-; CHECK: ret;
- %p = icmp sge i32 %x, 1
- %z = zext i1 %p to i32
- ret i32 %z
-}
-
-define ptx_device i32 @test_setp_4_op_format_1(i32 %x, i32 %y, i32 %u, i32 %v) {
-; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: setp.eq.and.u32 %p1, %r{{[0-9]+}}, %r{{[0-9]+}}, %p[[P0]];
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p1;
-; CHECK: ret;
- %c = icmp eq i32 %x, %y
- %d = icmp ugt i32 %u, %v
- %e = and i1 %c, %d
- %z = zext i1 %e to i32
- ret i32 %z
-}
-
-define ptx_device i32 @test_setp_4_op_format_2(i32 %x, i32 %y, i32 %w) {
-; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0;
-; CHECK: setp.eq.and.u32 %p1, %r{{[0-9]+}}, %r{{[0-9]+}}, !%p[[P0]];
-; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p1;
-; CHECK: ret;
- %c = trunc i32 %w to i1
- %d = icmp eq i32 %x, %y
- %e = xor i1 %c, 1
- %f = and i1 %d, %e
- %z = zext i1 %f to i32
- ret i32 %z
-}
diff --git a/test/CodeGen/PTX/shl.ll b/test/CodeGen/PTX/shl.ll
deleted file mode 100644
index d9fe2cdb5413..000000000000
--- a/test/CodeGen/PTX/shl.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-define ptx_device i32 @t1(i32 %x, i32 %y) {
-; CHECK: shl.b32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
- %z = shl i32 %x, %y
-; CHECK: ret;
- ret i32 %z
-}
-
-define ptx_device i32 @t2(i32 %x) {
-; CHECK: shl.b32 %ret{{[0-9]+}}, %r{{[0-9]+}}, 3
- %z = shl i32 %x, 3
-; CHECK: ret;
- ret i32 %z
-}
-
-define ptx_device i32 @t3(i32 %x) {
-; CHECK: shl.b32 %ret{{[0-9]+}}, 3, %r{{[0-9]+}}
- %z = shl i32 3, %x
-; CHECK: ret;
- ret i32 %z
-}
diff --git a/test/CodeGen/PTX/shr.ll b/test/CodeGen/PTX/shr.ll
deleted file mode 100644
index eb4666fbee65..000000000000
--- a/test/CodeGen/PTX/shr.ll
+++ /dev/null
@@ -1,43 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-define ptx_device i32 @t1(i32 %x, i32 %y) {
-; CHECK: shr.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
- %z = lshr i32 %x, %y
-; CHECK: ret;
- ret i32 %z
-}
-
-define ptx_device i32 @t2(i32 %x) {
-; CHECK: shr.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, 3
- %z = lshr i32 %x, 3
-; CHECK: ret;
- ret i32 %z
-}
-
-define ptx_device i32 @t3(i32 %x) {
-; CHECK: shr.u32 %ret{{[0-9]+}}, 3, %r{{[0-9]+}}
- %z = lshr i32 3, %x
-; CHECK: ret;
- ret i32 %z
-}
-
-define ptx_device i32 @t4(i32 %x, i32 %y) {
-; CHECK: shr.s32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
- %z = ashr i32 %x, %y
-; CHECK: ret;
- ret i32 %z
-}
-
-define ptx_device i32 @t5(i32 %x) {
-; CHECK: shr.s32 %ret{{[0-9]+}}, %r{{[0-9]+}}, 3
- %z = ashr i32 %x, 3
-; CHECK: ret;
- ret i32 %z
-}
-
-define ptx_device i32 @t6(i32 %x) {
-; CHECK: shr.s32 %ret{{[0-9]+}}, -3, %r{{[0-9]+}}
- %z = ashr i32 -3, %x
-; CHECK: ret;
- ret i32 %z
-}
diff --git a/test/CodeGen/PTX/simple-call.ll b/test/CodeGen/PTX/simple-call.ll
deleted file mode 100644
index 77ea29eae8bd..000000000000
--- a/test/CodeGen/PTX/simple-call.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s
-
-define ptx_device void @test_add(float %x, float %y) {
-; CHECK: ret;
- %z = fadd float %x, %y
- ret void
-}
-
-define ptx_device float @test_call(float %x, float %y) {
- %a = fadd float %x, %y
-; CHECK: call.uni test_add, (__localparam_{{[0-9]+}}, __localparam_{{[0-9]+}});
- call void @test_add(float %a, float %y)
- ret float %a
-}
-
-define ptx_device float @test_compute(float %x, float %y) {
-; CHECK: ret;
- %z = fadd float %x, %y
- ret float %z
-}
-
-define ptx_device float @test_call_compute(float %x, float %y) {
-; CHECK: call.uni (__localparam_{{[0-9]+}}), test_compute, (__localparam_{{[0-9]+}}, __localparam_{{[0-9]+}})
- %z = call float @test_compute(float %x, float %y)
- ret float %z
-}
-
diff --git a/test/CodeGen/PTX/st.ll b/test/CodeGen/PTX/st.ll
deleted file mode 100644
index c7943630c435..000000000000
--- a/test/CodeGen/PTX/st.ll
+++ /dev/null
@@ -1,337 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-;CHECK: .extern .global .b16 array_i16[10];
-@array_i16 = external global [10 x i16]
-
-;CHECK: .extern .const .b16 array_constant_i16[10];
-@array_constant_i16 = external addrspace(1) constant [10 x i16]
-
-;CHECK: .extern .shared .b16 array_shared_i16[10];
-@array_shared_i16 = external addrspace(4) global [10 x i16]
-
-;CHECK: .extern .global .b32 array_i32[10];
-@array_i32 = external global [10 x i32]
-
-;CHECK: .extern .const .b32 array_constant_i32[10];
-@array_constant_i32 = external addrspace(1) constant [10 x i32]
-
-;CHECK: .extern .shared .b32 array_shared_i32[10];
-@array_shared_i32 = external addrspace(4) global [10 x i32]
-
-;CHECK: .extern .global .b64 array_i64[10];
-@array_i64 = external global [10 x i64]
-
-;CHECK: .extern .const .b64 array_constant_i64[10];
-@array_constant_i64 = external addrspace(1) constant [10 x i64]
-
-;CHECK: .extern .shared .b64 array_shared_i64[10];
-@array_shared_i64 = external addrspace(4) global [10 x i64]
-
-;CHECK: .extern .global .b32 array_float[10];
-@array_float = external global [10 x float]
-
-;CHECK: .extern .const .b32 array_constant_float[10];
-@array_constant_float = external addrspace(1) constant [10 x float]
-
-;CHECK: .extern .shared .b32 array_shared_float[10];
-@array_shared_float = external addrspace(4) global [10 x float]
-
-;CHECK: .extern .global .b64 array_double[10];
-@array_double = external global [10 x double]
-
-;CHECK: .extern .const .b64 array_constant_double[10];
-@array_constant_double = external addrspace(1) constant [10 x double]
-
-;CHECK: .extern .shared .b64 array_shared_double[10];
-@array_shared_double = external addrspace(4) global [10 x double]
-
-
-define ptx_device void @t1_u16(i16* %p, i16 %x) {
-entry:
-;CHECK: st.global.u16 [%r{{[0-9]+}}], %rh{{[0-9]+}};
-;CHECK: ret;
- store i16 %x, i16* %p
- ret void
-}
-
-define ptx_device void @t1_u32(i32* %p, i32 %x) {
-entry:
-;CHECK: st.global.u32 [%r{{[0-9]+}}], %r{{[0-9]+}};
-;CHECK: ret;
- store i32 %x, i32* %p
- ret void
-}
-
-define ptx_device void @t1_u64(i64* %p, i64 %x) {
-entry:
-;CHECK: st.global.u64 [%r{{[0-9]+}}], %rd{{[0-9]+}};
-;CHECK: ret;
- store i64 %x, i64* %p
- ret void
-}
-
-define ptx_device void @t1_f32(float* %p, float %x) {
-entry:
-;CHECK: st.global.f32 [%r{{[0-9]+}}], %f{{[0-9]+}};
-;CHECK: ret;
- store float %x, float* %p
- ret void
-}
-
-define ptx_device void @t1_f64(double* %p, double %x) {
-entry:
-;CHECK: st.global.f64 [%r{{[0-9]+}}], %fd{{[0-9]+}};
-;CHECK: ret;
- store double %x, double* %p
- ret void
-}
-
-define ptx_device void @t2_u16(i16* %p, i16 %x) {
-entry:
-;CHECK: st.global.u16 [%r{{[0-9]+}}+2], %rh{{[0-9]+}};
-;CHECK: ret;
- %i = getelementptr i16* %p, i32 1
- store i16 %x, i16* %i
- ret void
-}
-
-define ptx_device void @t2_u32(i32* %p, i32 %x) {
-entry:
-;CHECK: st.global.u32 [%r{{[0-9]+}}+4], %r{{[0-9]+}};
-;CHECK: ret;
- %i = getelementptr i32* %p, i32 1
- store i32 %x, i32* %i
- ret void
-}
-
-define ptx_device void @t2_u64(i64* %p, i64 %x) {
-entry:
-;CHECK: st.global.u64 [%r{{[0-9]+}}+8], %rd{{[0-9]+}};
-;CHECK: ret;
- %i = getelementptr i64* %p, i32 1
- store i64 %x, i64* %i
- ret void
-}
-
-define ptx_device void @t2_f32(float* %p, float %x) {
-entry:
-;CHECK: st.global.f32 [%r{{[0-9]+}}+4], %f{{[0-9]+}};
-;CHECK: ret;
- %i = getelementptr float* %p, i32 1
- store float %x, float* %i
- ret void
-}
-
-define ptx_device void @t2_f64(double* %p, double %x) {
-entry:
-;CHECK: st.global.f64 [%r{{[0-9]+}}+8], %fd{{[0-9]+}};
-;CHECK: ret;
- %i = getelementptr double* %p, i32 1
- store double %x, double* %i
- ret void
-}
-
-define ptx_device void @t3_u16(i16* %p, i32 %q, i16 %x) {
-entry:
-;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 1;
-;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
-;CHECK: st.global.u16 [%r{{[0-9]+}}], %rh{{[0-9]+}};
-;CHECK: ret;
- %i = getelementptr i16* %p, i32 %q
- store i16 %x, i16* %i
- ret void
-}
-
-define ptx_device void @t3_u32(i32* %p, i32 %q, i32 %x) {
-entry:
-;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2;
-;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
-;CHECK: st.global.u32 [%r{{[0-9]+}}], %r{{[0-9]+}};
-;CHECK: ret;
- %i = getelementptr i32* %p, i32 %q
- store i32 %x, i32* %i
- ret void
-}
-
-define ptx_device void @t3_u64(i64* %p, i32 %q, i64 %x) {
-entry:
-;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3;
-;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
-;CHECK: st.global.u64 [%r{{[0-9]+}}], %rd{{[0-9]+}};
-;CHECK: ret;
- %i = getelementptr i64* %p, i32 %q
- store i64 %x, i64* %i
- ret void
-}
-
-define ptx_device void @t3_f32(float* %p, i32 %q, float %x) {
-entry:
-;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2;
-;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
-;CHECK: st.global.f32 [%r{{[0-9]+}}], %f{{[0-9]+}};
-;CHECK: ret;
- %i = getelementptr float* %p, i32 %q
- store float %x, float* %i
- ret void
-}
-
-define ptx_device void @t3_f64(double* %p, i32 %q, double %x) {
-entry:
-;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3;
-;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
-;CHECK: st.global.f64 [%r{{[0-9]+}}], %fd{{[0-9]+}};
-;CHECK: ret;
- %i = getelementptr double* %p, i32 %q
- store double %x, double* %i
- ret void
-}
-
-define ptx_device void @t4_global_u16(i16 %x) {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16;
-;CHECK: st.global.u16 [%r[[R0]]], %rh{{[0-9]+}};
-;CHECK: ret;
- %i = getelementptr [10 x i16]* @array_i16, i16 0, i16 0
- store i16 %x, i16* %i
- ret void
-}
-
-define ptx_device void @t4_global_u32(i32 %x) {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32;
-;CHECK: st.global.u32 [%r[[R0]]], %r{{[0-9]+}};
-;CHECK: ret;
- %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0
- store i32 %x, i32* %i
- ret void
-}
-
-define ptx_device void @t4_global_u64(i64 %x) {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64;
-;CHECK: st.global.u64 [%r[[R0]]], %rd{{[0-9]+}};
-;CHECK: ret;
- %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0
- store i64 %x, i64* %i
- ret void
-}
-
-define ptx_device void @t4_global_f32(float %x) {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float;
-;CHECK: st.global.f32 [%r[[R0]]], %f{{[0-9]+}};
-;CHECK: ret;
- %i = getelementptr [10 x float]* @array_float, i32 0, i32 0
- store float %x, float* %i
- ret void
-}
-
-define ptx_device void @t4_global_f64(double %x) {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double;
-;CHECK: st.global.f64 [%r[[R0]]], %fd{{[0-9]+}};
-;CHECK: ret;
- %i = getelementptr [10 x double]* @array_double, i32 0, i32 0
- store double %x, double* %i
- ret void
-}
-
-define ptx_device void @t4_shared_u16(i16 %x) {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i16;
-;CHECK: st.shared.u16 [%r[[R0]]], %rh{{[0-9]+}};
-;CHECK: ret;
- %i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0
- store i16 %x, i16 addrspace(4)* %i
- ret void
-}
-
-define ptx_device void @t4_shared_u32(i32 %x) {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i32;
-;CHECK: st.shared.u32 [%r[[R0]]], %r{{[0-9]+}};
-;CHECK: ret;
- %i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0
- store i32 %x, i32 addrspace(4)* %i
- ret void
-}
-
-define ptx_device void @t4_shared_u64(i64 %x) {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i64;
-;CHECK: st.shared.u64 [%r[[R0]]], %rd{{[0-9]+}};
-;CHECK: ret;
- %i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0
- store i64 %x, i64 addrspace(4)* %i
- ret void
-}
-
-define ptx_device void @t4_shared_f32(float %x) {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_float;
-;CHECK: st.shared.f32 [%r[[R0]]], %f{{[0-9]+}};
-;CHECK: ret;
- %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0
- store float %x, float addrspace(4)* %i
- ret void
-}
-
-define ptx_device void @t4_shared_f64(double %x) {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_double;
-;CHECK: st.shared.f64 [%r[[R0]]], %fd{{[0-9]+}};
-;CHECK: ret;
- %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0
- store double %x, double addrspace(4)* %i
- ret void
-}
-
-define ptx_device void @t5_u16(i16 %x) {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16;
-;CHECK: st.global.u16 [%r[[R0]]+2], %rh{{[0-9]+}};
-;CHECK: ret;
- %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1
- store i16 %x, i16* %i
- ret void
-}
-
-define ptx_device void @t5_u32(i32 %x) {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32;
-;CHECK: st.global.u32 [%r[[R0]]+4], %r{{[0-9]+}};
-;CHECK: ret;
- %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1
- store i32 %x, i32* %i
- ret void
-}
-
-define ptx_device void @t5_u64(i64 %x) {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64;
-;CHECK: st.global.u64 [%r[[R0]]+8], %rd{{[0-9]+}};
-;CHECK: ret;
- %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1
- store i64 %x, i64* %i
- ret void
-}
-
-define ptx_device void @t5_f32(float %x) {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float;
-;CHECK: st.global.f32 [%r[[R0]]+4], %f{{[0-9]+}};
-;CHECK: ret;
- %i = getelementptr [10 x float]* @array_float, i32 0, i32 1
- store float %x, float* %i
- ret void
-}
-
-define ptx_device void @t5_f64(double %x) {
-entry:
-;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double;
-;CHECK: st.global.f64 [%r[[R0]]+8], %fd{{[0-9]+}};
-;CHECK: ret;
- %i = getelementptr [10 x double]* @array_double, i32 0, i32 1
- store double %x, double* %i
- ret void
-}
diff --git a/test/CodeGen/PTX/stack-object.ll b/test/CodeGen/PTX/stack-object.ll
deleted file mode 100644
index 65f8ee2300ce..000000000000
--- a/test/CodeGen/PTX/stack-object.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s
-
-define ptx_device float @stack1(float %a) {
- ; CHECK: .local .align 4 .b8 __local0[4];
- %a.2 = alloca float, align 4
- ; CHECK: st.local.f32 [__local0], %f0
- store float %a, float* %a.2
- %a.3 = load float* %a.2
- ret float %a.3
-}
-
-define ptx_device float @stack1_align8(float %a) {
- ; CHECK: .local .align 8 .b8 __local0[4];
- %a.2 = alloca float, align 8
- ; CHECK: st.local.f32 [__local0], %f0
- store float %a, float* %a.2
- %a.3 = load float* %a.2
- ret float %a.3
-}
diff --git a/test/CodeGen/PTX/sub.ll b/test/CodeGen/PTX/sub.ll
deleted file mode 100644
index 7ac886ad645e..000000000000
--- a/test/CodeGen/PTX/sub.ll
+++ /dev/null
@@ -1,71 +0,0 @@
-; RUN: llc < %s -march=ptx32 | FileCheck %s
-
-define ptx_device i16 @t1_u16(i16 %x, i16 %y) {
-; CHECK: sub.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}, %rh{{[0-9]+}};
-; CHECK: ret;
- %z = sub i16 %x, %y
- ret i16 %z
-}
-
-define ptx_device i32 @t1_u32(i32 %x, i32 %y) {
-; CHECK: sub.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}};
-; CHECK: ret;
- %z = sub i32 %x, %y
- ret i32 %z
-}
-
-define ptx_device i64 @t1_u64(i64 %x, i64 %y) {
-; CHECK: sub.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, %rd{{[0-9]+}};
-; CHECK: ret;
- %z = sub i64 %x, %y
- ret i64 %z
-}
-
-define ptx_device float @t1_f32(float %x, float %y) {
-; CHECK: sub.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
-; CHECK: ret;
- %z = fsub float %x, %y
- ret float %z
-}
-
-define ptx_device double @t1_f64(double %x, double %y) {
-; CHECK: sub.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}
-; CHECK: ret;
- %z = fsub double %x, %y
- ret double %z
-}
-
-define ptx_device i16 @t2_u16(i16 %x) {
-; CHECK: add.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}, -1;
-; CHECK: ret;
- %z = sub i16 %x, 1
- ret i16 %z
-}
-
-define ptx_device i32 @t2_u32(i32 %x) {
-; CHECK: add.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, -1;
-; CHECK: ret;
- %z = sub i32 %x, 1
- ret i32 %z
-}
-
-define ptx_device i64 @t2_u64(i64 %x) {
-; CHECK: add.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, -1;
-; CHECK: ret;
- %z = sub i64 %x, 1
- ret i64 %z
-}
-
-define ptx_device float @t2_f32(float %x) {
-; CHECK: add.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, 0DBFF0000000000000;
-; CHECK: ret;
- %z = fsub float %x, 1.0
- ret float %z
-}
-
-define ptx_device double @t2_f64(double %x) {
-; CHECK: add.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, 0DBFF0000000000000;
-; CHECK: ret;
- %z = fsub double %x, 1.0
- ret double %z
-}
diff --git a/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll b/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll
index 5d1df468a66d..43736601fe27 100644
--- a/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll
+++ b/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll
@@ -1,7 +1,7 @@
; This function should have exactly one call to fixdfdi, no more!
; RUN: llc < %s -march=ppc32 -mattr=-64bit | \
-; RUN: grep {bl .*fixdfdi} | count 1
+; RUN: grep "bl .*fixdfdi" | count 1
define double @test2(double %tmp.7705) {
%mem_tmp.2.0.in = fptosi double %tmp.7705 to i64 ; <i64> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll b/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll
index 97bb48e96e56..aeb28af4be52 100644
--- a/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll
+++ b/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | not grep {, f1}
+; RUN: llc < %s | not grep ", f1"
target datalayout = "E-p:32:32"
target triple = "powerpc-apple-darwin8.2.0"
diff --git a/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll b/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll
index 969772ee2bee..7e845382a8e8 100644
--- a/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll
+++ b/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g5 | \
-; RUN: grep {vspltish v.*, 10}
+; RUN: grep "vspltish v.*, 10"
define void @test(<8 x i16>* %P) {
%tmp = load <8 x i16>* %P ; <<8 x i16>> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll b/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll
index 86fd9475029d..73736c57fea6 100644
--- a/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll
+++ b/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep {foo r3, r4}
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep {bari r3, 47}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep "foo r3, r4"
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep "bari r3, 47"
; PR1351
diff --git a/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll b/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
index 3489477e4ce4..53231b4f435e 100644
--- a/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
+++ b/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s | FileCheck %s
-; RUN: llc < %s -regalloc=fast | FileCheck %s
+; RUN: llc < %s -regalloc=fast -optimize-regalloc=0 | FileCheck %s
; The first argument of subfc must not be the same as any other register.
; CHECK: subfc [[REG:r.]],
diff --git a/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll b/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
index c1415510d05f..382ba1f6a82d 100644
--- a/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
+++ b/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=ppc32 | grep bl.*baz | count 2
-; RUN: llc < %s -march=ppc32 | grep bl.*quux | count 2
-; RUN: llc < %s -march=ppc32 -enable-tail-merge | grep bl.*baz | count 1
-; RUN: llc < %s -march=ppc32 -enable-tail-merge=1 | grep bl.*quux | count 1
-; Check that tail merging is not the default on ppc, and that -enable-tail-merge works.
+; RUN: llc < %s -march=ppc32 -enable-tail-merge=0 | grep bl.*baz | count 2
+; RUN: llc < %s -march=ppc32 -enable-tail-merge=0 | grep bl.*quux | count 2
+; RUN: llc < %s -march=ppc32 | grep bl.*baz | count 1
+; RUN: llc < %s -march=ppc32 | grep bl.*quux | count 1
+; Check that tail merging is the default on ppc, and that -enable-tail-merge works.
; ModuleID = 'tail.c'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll b/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll
index 72e93a9cced1..b85792c6f4f6 100644
--- a/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll
+++ b/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll
@@ -1,7 +1,7 @@
target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
target triple = "powerpc-apple-darwin8.8.0"
-; RUN: llc < %s -march=ppc32 | grep {rlwinm r3, r3, 23, 30, 30}
+; RUN: llc < %s -march=ppc32 | grep "rlwinm r3, r3, 23, 30, 30"
; PR1473
define zeroext i8 @foo(i16 zeroext %a) {
diff --git a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll
index 556a4a1c4023..a60d11c85c55 100644
--- a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll
+++ b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=powerpc64-apple-darwin9 -regalloc=fast -relocation-model=pic
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin9 -regalloc=fast -optimize-regalloc=0 -relocation-model=pic
%struct.NSError = type opaque
%struct.NSManagedObjectContext = type opaque
diff --git a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll
index b3b928046748..3d1a328ec3c1 100644
--- a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll
+++ b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=powerpc64-apple-darwin9 -regalloc=fast -relocation-model=pic
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin9 -regalloc=fast -optimize-regalloc=0 -relocation-model=pic
%struct.NSError = type opaque
%struct.NSManagedObjectContext = type opaque
diff --git a/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll b/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll
index e03bd9e2792c..e28a3e04cf1b 100644
--- a/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll
+++ b/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=powerpc-apple-darwin -regalloc=fast
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -regalloc=fast -optimize-regalloc=0
define i32 @bork(i64 %foo, i64 %bar) {
entry:
diff --git a/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll b/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
index 6a3c440bc9e7..84aa40c4b52a 100644
--- a/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
+++ b/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin10 -mcpu=g5 | FileCheck %s
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin10 -mcpu=g5 -disable-ppc-ilp-pref | FileCheck %s
; ModuleID = '<stdin>'
target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
target triple = "powerpc-apple-darwin10.0"
diff --git a/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll b/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll
index 6b31397138c2..0003a17c2284 100644
--- a/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll
+++ b/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 -mcpu=g5 -mtriple=powerpc-apple-darwin10.0 -join-physregs | FileCheck %s
+; RUN: llc < %s -march=ppc32 -mcpu=g5 -mtriple=powerpc-apple-darwin10.0 | FileCheck %s
; ModuleID = 'nn.c'
target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
target triple = "powerpc-apple-darwin11.0"
@@ -9,7 +9,9 @@ target triple = "powerpc-apple-darwin11.0"
define void @foo() nounwind ssp {
entry:
-; CHECK: mtctr r12
+; Better: mtctr r12
+; CHECK: mr r12, [[REG:r[0-9]+]]
+; CHECK: mtctr [[REG]]
%0 = load void (...)** @p, align 4 ; <void (...)*> [#uses=1]
call void (...)* %0() nounwind
br label %return
diff --git a/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll b/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll
index 6161b55edee9..47d985c5f755 100644
--- a/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll
+++ b/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=powerpc-apple-darwin -mcpu=g4 | FileCheck %s
-; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g4 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -mcpu=g4 -disable-ppc-ilp-pref | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g4 -disable-ppc-ilp-pref | FileCheck %s
; ModuleID = 'tsc.c'
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
diff --git a/test/CodeGen/PowerPC/Frames-leaf.ll b/test/CodeGen/PowerPC/Frames-leaf.ll
index c2e1d6bddc5d..7b1c464f9e7d 100644
--- a/test/CodeGen/PowerPC/Frames-leaf.ll
+++ b/test/CodeGen/PowerPC/Frames-leaf.ll
@@ -1,35 +1,35 @@
; RUN: llc < %s -march=ppc32 | \
-; RUN: not grep {stw r31, 20(r1)}
+; RUN: not grep "stw r31, 20(r1)"
; RUN: llc < %s -march=ppc32 | \
-; RUN: not grep {stwu r1, -.*(r1)}
+; RUN: not grep "stwu r1, -.*(r1)"
; RUN: llc < %s -march=ppc32 | \
-; RUN: not grep {addi r1, r1, }
+; RUN: not grep "addi r1, r1, "
; RUN: llc < %s -march=ppc32 | \
-; RUN: not grep {lwz r31, 20(r1)}
+; RUN: not grep "lwz r31, 20(r1)"
; RUN: llc < %s -march=ppc32 -disable-fp-elim | \
-; RUN: not grep {stw r31, 20(r1)}
+; RUN: not grep "stw r31, 20(r1)"
; RUN: llc < %s -march=ppc32 -disable-fp-elim | \
-; RUN: not grep {stwu r1, -.*(r1)}
+; RUN: not grep "stwu r1, -.*(r1)"
; RUN: llc < %s -march=ppc32 -disable-fp-elim | \
-; RUN: not grep {addi r1, r1, }
+; RUN: not grep "addi r1, r1, "
; RUN: llc < %s -march=ppc32 -disable-fp-elim | \
-; RUN: not grep {lwz r31, 20(r1)}
+; RUN: not grep "lwz r31, 20(r1)"
; RUN: llc < %s -march=ppc64 | \
-; RUN: not grep {std r31, 40(r1)}
+; RUN: not grep "std r31, 40(r1)"
; RUN: llc < %s -march=ppc64 | \
-; RUN: not grep {stdu r1, -.*(r1)}
+; RUN: not grep "stdu r1, -.*(r1)"
; RUN: llc < %s -march=ppc64 | \
-; RUN: not grep {addi r1, r1, }
+; RUN: not grep "addi r1, r1, "
; RUN: llc < %s -march=ppc64 | \
-; RUN: not grep {ld r31, 40(r1)}
+; RUN: not grep "ld r31, 40(r1)"
; RUN: llc < %s -march=ppc64 -disable-fp-elim | \
-; RUN: not grep {stw r31, 40(r1)}
+; RUN: not grep "stw r31, 40(r1)"
; RUN: llc < %s -march=ppc64 -disable-fp-elim | \
-; RUN: not grep {stdu r1, -.*(r1)}
+; RUN: not grep "stdu r1, -.*(r1)"
; RUN: llc < %s -march=ppc64 -disable-fp-elim | \
-; RUN: not grep {addi r1, r1, }
+; RUN: not grep "addi r1, r1, "
; RUN: llc < %s -march=ppc64 -disable-fp-elim | \
-; RUN: not grep {ld r31, 40(r1)}
+; RUN: not grep "ld r31, 40(r1)"
define i32* @f1() {
%tmp = alloca i32, i32 2 ; <i32*> [#uses=1]
diff --git a/test/CodeGen/PowerPC/Frames-small.ll b/test/CodeGen/PowerPC/Frames-small.ll
index ecd5ecd2eca1..0f6bd1021f80 100644
--- a/test/CodeGen/PowerPC/Frames-small.ll
+++ b/test/CodeGen/PowerPC/Frames-small.ll
@@ -1,26 +1,26 @@
; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -o %t1
-; RUN: not grep {stw r31, -4(r1)} %t1
-; RUN: grep {stwu r1, -16448(r1)} %t1
-; RUN: grep {addi r1, r1, 16448} %t1
+; RUN: not grep "stw r31, -4(r1)" %t1
+; RUN: grep "stwu r1, -16448(r1)" %t1
+; RUN: grep "addi r1, r1, 16448" %t1
; RUN: llc < %s -march=ppc32 | \
-; RUN: not grep {lwz r31, -4(r1)}
+; RUN: not grep "lwz r31, -4(r1)"
; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \
; RUN: -o %t2
-; RUN: grep {stw r31, -4(r1)} %t2
-; RUN: grep {stwu r1, -16448(r1)} %t2
-; RUN: grep {addi r1, r1, 16448} %t2
-; RUN: grep {lwz r31, -4(r1)} %t2
+; RUN: grep "stw r31, -4(r1)" %t2
+; RUN: grep "stwu r1, -16448(r1)" %t2
+; RUN: grep "addi r1, r1, 16448" %t2
+; RUN: grep "lwz r31, -4(r1)" %t2
; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -o %t3
-; RUN: not grep {std r31, -8(r1)} %t3
-; RUN: grep {stdu r1, -16496(r1)} %t3
-; RUN: grep {addi r1, r1, 16496} %t3
-; RUN: not grep {ld r31, -8(r1)} %t3
+; RUN: not grep "std r31, -8(r1)" %t3
+; RUN: grep "stdu r1, -16496(r1)" %t3
+; RUN: grep "addi r1, r1, 16496" %t3
+; RUN: not grep "ld r31, -8(r1)" %t3
; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \
; RUN: -o %t4
-; RUN: grep {std r31, -8(r1)} %t4
-; RUN: grep {stdu r1, -16512(r1)} %t4
-; RUN: grep {addi r1, r1, 16512} %t4
-; RUN: grep {ld r31, -8(r1)} %t4
+; RUN: grep "std r31, -8(r1)" %t4
+; RUN: grep "stdu r1, -16512(r1)" %t4
+; RUN: grep "addi r1, r1, 16512" %t4
+; RUN: grep "ld r31, -8(r1)" %t4
define i32* @f1() {
%tmp = alloca i32, i32 4095 ; <i32*> [#uses=1]
diff --git a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
index 7b0d69cb3bea..6f985c819fb6 100644
--- a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
+++ b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
@@ -1,9 +1,9 @@
; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | \
-; RUN: grep {stw r4, 32751}
+; RUN: grep "stw r4, 32751"
; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
-; RUN: grep {stw r4, 32751}
+; RUN: grep "stw r4, 32751"
; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
-; RUN: grep {std r4, 9024}
+; RUN: grep "std r4, 9024"
define void @test() nounwind {
store i32 0, i32* inttoptr (i64 48725999 to i32*)
diff --git a/test/CodeGen/PowerPC/a2-fp-basic.ll b/test/CodeGen/PowerPC/a2-fp-basic.ll
index 932ad7a63ce4..de3aa7c31766 100644
--- a/test/CodeGen/PowerPC/a2-fp-basic.ll
+++ b/test/CodeGen/PowerPC/a2-fp-basic.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc64 -mcpu=a2 | FileCheck %s
+; RUN: llc < %s -march=ppc64 -mcpu=a2 -fp-contract=fast | FileCheck %s
%0 = type { double, double }
diff --git a/test/CodeGen/PowerPC/and-imm.ll b/test/CodeGen/PowerPC/and-imm.ll
index 64a45e50c0a9..6fd484b40b6e 100644
--- a/test/CodeGen/PowerPC/and-imm.ll
+++ b/test/CodeGen/PowerPC/and-imm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 | not grep {ori\\|lis}
+; RUN: llc < %s -march=ppc32 | not grep "ori\|lis"
; andi. r3, r3, 32769
define i32 @test(i32 %X) {
diff --git a/test/CodeGen/PowerPC/big-endian-actual-args.ll b/test/CodeGen/PowerPC/big-endian-actual-args.ll
index 009f46811e78..898ad7cb857d 100644
--- a/test/CodeGen/PowerPC/big-endian-actual-args.ll
+++ b/test/CodeGen/PowerPC/big-endian-actual-args.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
-; RUN: grep {addc 4, 4, 6}
+; RUN: grep "addc 4, 4, 6"
; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
-; RUN: grep {adde 3, 3, 5}
+; RUN: grep "adde 3, 3, 5"
define i64 @foo(i64 %x, i64 %y) {
%z = add i64 %x, %y
diff --git a/test/CodeGen/PowerPC/big-endian-call-result.ll b/test/CodeGen/PowerPC/big-endian-call-result.ll
index fe85404cb94f..760833ce20ff 100644
--- a/test/CodeGen/PowerPC/big-endian-call-result.ll
+++ b/test/CodeGen/PowerPC/big-endian-call-result.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
-; RUN: grep {addic 4, 4, 1}
+; RUN: grep "addic 4, 4, 1"
; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
-; RUN: grep {addze 3, 3}
+; RUN: grep "addze 3, 3"
declare i64 @foo()
diff --git a/test/CodeGen/PowerPC/branch-opt.ll b/test/CodeGen/PowerPC/branch-opt.ll
index cc02e406aa61..dda1538f1cdf 100644
--- a/test/CodeGen/PowerPC/branch-opt.ll
+++ b/test/CodeGen/PowerPC/branch-opt.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=ppc32 | \
-; RUN: grep {b LBB.*} | count 4
+; RUN: grep "b LBB.*" | count 4
target datalayout = "E-p:32:32"
target triple = "powerpc-apple-darwin8.7.0"
diff --git a/test/CodeGen/PowerPC/calls.ll b/test/CodeGen/PowerPC/calls.ll
index 29bcb2081188..dcdda5721464 100644
--- a/test/CodeGen/PowerPC/calls.ll
+++ b/test/CodeGen/PowerPC/calls.ll
@@ -1,11 +1,11 @@
; Test various forms of calls.
; RUN: llc < %s -march=ppc32 | \
-; RUN: grep {bl } | count 1
+; RUN: grep "bl " | count 1
; RUN: llc < %s -march=ppc32 | \
-; RUN: grep {bctrl} | count 1
+; RUN: grep "bctrl" | count 1
; RUN: llc < %s -march=ppc32 | \
-; RUN: grep {bla } | count 1
+; RUN: grep "bla " | count 1
declare void @foo()
diff --git a/test/CodeGen/PowerPC/coalesce-ext.ll b/test/CodeGen/PowerPC/coalesce-ext.ll
new file mode 100644
index 000000000000..cc80f8330798
--- /dev/null
+++ b/test/CodeGen/PowerPC/coalesce-ext.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=ppc64 -mtriple=powerpc64-apple-darwin < %s | FileCheck %s
+; Check that the peephole optimizer knows about sext and zext instructions.
+; CHECK: test1sext
+define i32 @test1sext(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind {
+ %C = add i64 %A, %B
+ ; CHECK: add [[SUM:r[0-9]+]], r3, r4
+ %D = trunc i64 %C to i32
+ %E = shl i64 %C, 32
+ %F = ashr i64 %E, 32
+ ; CHECK: extsw [[EXT:r[0-9]+]], [[SUM]]
+ store volatile i64 %F, i64 *%P2
+ ; CHECK: std [[EXT]]
+ store volatile i32 %D, i32* %P
+ ; Reuse low bits of extended register, don't extend live range of SUM.
+ ; CHECK: stw [[EXT]]
+ ret i32 %D
+}
diff --git a/test/CodeGen/PowerPC/compare-simm.ll b/test/CodeGen/PowerPC/compare-simm.ll
index 92d1dbe902a1..94c5c0290f58 100644
--- a/test/CodeGen/PowerPC/compare-simm.ll
+++ b/test/CodeGen/PowerPC/compare-simm.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN: grep {cmpwi cr0, r3, -1}
+; RUN: grep "cmpwi cr0, r3, -1"
define i32 @test(i32 %x) nounwind {
%c = icmp eq i32 %x, -1
diff --git a/test/CodeGen/PowerPC/constants.ll b/test/CodeGen/PowerPC/constants.ll
index 8901e02d3b80..9efca916d69c 100644
--- a/test/CodeGen/PowerPC/constants.ll
+++ b/test/CodeGen/PowerPC/constants.ll
@@ -4,7 +4,7 @@
; RUN: llc < %s -march=ppc32 | \
; RUN: grep ori | count 3
; RUN: llc < %s -march=ppc32 | \
-; RUN: grep {li } | count 4
+; RUN: grep "li " | count 4
define i32 @f1() {
entry:
diff --git a/test/CodeGen/PowerPC/ctrloop-reg.ll b/test/CodeGen/PowerPC/ctrloop-reg.ll
new file mode 100644
index 000000000000..874e5712bb99
--- /dev/null
+++ b/test/CodeGen/PowerPC/ctrloop-reg.ll
@@ -0,0 +1,87 @@
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc < %s -march=ppc64 | FileCheck %s
+
+%struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211 = type { %union.v.0.48.90.114.120.138.144.150.156.162.168.174.180.210, i16, i16 }
+%union.v.0.48.90.114.120.138.144.150.156.162.168.174.180.210 = type { i64 }
+%struct.stream_s.5.53.95.119.125.143.149.155.161.167.173.179.185.215 = type { i8*, i8*, i8*, i32, i8, i8, i64, %struct.stream_procs.2.50.92.116.122.140.146.152.158.164.170.176.182.212, i32, %struct._IO_FILE.4.52.94.118.124.142.148.154.160.166.172.178.184.214*, %struct.stream_s.5.53.95.119.125.143.149.155.161.167.173.179.185.215*, i16, i32 }
+%struct.stream_procs.2.50.92.116.122.140.146.152.158.164.170.176.182.212 = type { i32 (%struct.stream_s.5.53.95.119.125.143.149.155.161.167.173.179.185.215*)*, i32 (%struct.stream_s.5.53.95.119.125.143.149.155.161.167.173.179.185.215*, i8)*, i32 (%struct.stream_s.5.53.95.119.125.143.149.155.161.167.173.179.185.215*, i64*)*, i32 (%struct.stream_s.5.53.95.119.125.143.149.155.161.167.173.179.185.215*, i64)*, i32 (%struct.stream_s.5.53.95.119.125.143.149.155.161.167.173.179.185.215*)*, i32 (%struct.stream_s.5.53.95.119.125.143.149.155.161.167.173.179.185.215*)* }
+%struct._IO_FILE.4.52.94.118.124.142.148.154.160.166.172.178.184.214 = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker.3.51.93.117.123.141.147.153.159.165.171.177.183.213*, %struct._IO_FILE.4.52.94.118.124.142.148.154.160.166.172.178.184.214*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
+%struct._IO_marker.3.51.93.117.123.141.147.153.159.165.171.177.183.213 = type { %struct._IO_marker.3.51.93.117.123.141.147.153.159.165.171.177.183.213*, %struct._IO_FILE.4.52.94.118.124.142.148.154.160.166.172.178.184.214*, i32 }
+
+@special_ops = external global [7 x i32 (%struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*)*], align 8
+@ostack = external global [520 x %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211], align 8
+@osbot = external global %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*, align 8
+@osp = external global %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*, align 8
+@ostop = external global %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*, align 8
+@osp_nargs = external global [6 x %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*], align 8
+@estack = external global [150 x %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211], align 8
+@esp = external global %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*, align 8
+@estop = external global %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*, align 8
+@dstack = external global [20 x %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211], align 8
+@dsp = external global %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*, align 8
+@dstop = external global %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*, align 8
+@name_errordict = external global %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211
+@name_ErrorNames = external global %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211
+@error_object = external global %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211, align 8
+
+declare i32 @zadd(%struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*)
+
+declare i32 @zdup(%struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*)
+
+declare i32 @zexch(%struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*)
+
+declare i32 @zifelse(%struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*)
+
+declare i32 @zle(%struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*)
+
+declare i32 @zpop(%struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*)
+
+declare i32 @zsub(%struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*)
+
+declare void @interp_init(i32) nounwind
+
+declare void @interp_fix_op(%struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211* nocapture) nounwind
+
+define i32 @interpret(%struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211* %pref, i32 %user_errors) nounwind {
+entry:
+ %erref = alloca %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211, align 8
+ br i1 undef, label %retry.us, label %retry
+
+retry.us: ; preds = %if.end18, %retry, %retry, %retry, %retry, %entry
+ ret i32 undef
+
+retry: ; preds = %if.end18, %entry
+ %0 = phi %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211* [ null, %entry ], [ %erref, %if.end18 ]
+ %call = call i32 @interp(%struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211* %0)
+ switch i32 %call, label %if.end18 [
+ i32 -3, label %retry.us
+ i32 -5, label %retry.us
+ i32 -16, label %retry.us
+ i32 -25, label %retry.us
+ ]
+
+if.end18: ; preds = %retry
+ br i1 false, label %retry.us, label %retry
+}
+
+; CHECK: @interpret
+
+declare i32 @interp_exit(%struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211* nocapture) nounwind readnone
+
+declare i32 @interp(%struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*) nounwind
+
+declare i32 @dict_lookup(%struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*, %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*, %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211*, %struct.ref_s.1.49.91.115.121.139.145.151.157.163.169.175.181.211**)
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+declare i32 @obj_compare(...)
+
+declare i32 @file_check_read(...)
+
+declare i32 @scan_token(...)
+
+declare i32 @file_close(...)
+
+declare void @sread_string(%struct.stream_s.5.53.95.119.125.143.149.155.161.167.173.179.185.215*, i8*, i32)
diff --git a/test/CodeGen/PowerPC/ctrloop-s000.ll b/test/CodeGen/PowerPC/ctrloop-s000.ll
new file mode 100644
index 000000000000..dcea06f29e7c
--- /dev/null
+++ b/test/CodeGen/PowerPC/ctrloop-s000.ll
@@ -0,0 +1,156 @@
+; ModuleID = 'tsc_s000.c'
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc < %s -march=ppc64 | FileCheck %s
+
+@Y = common global [16000 x double] zeroinitializer, align 32
+@X = common global [16000 x double] zeroinitializer, align 32
+@Z = common global [16000 x double] zeroinitializer, align 32
+@U = common global [16000 x double] zeroinitializer, align 32
+@V = common global [16000 x double] zeroinitializer, align 32
+@aa = common global [256 x [256 x double]] zeroinitializer, align 32
+@bb = common global [256 x [256 x double]] zeroinitializer, align 32
+@cc = common global [256 x [256 x double]] zeroinitializer, align 32
+@array = common global [65536 x double] zeroinitializer, align 32
+@x = common global [16000 x double] zeroinitializer, align 32
+@temp = common global double 0.000000e+00, align 8
+@temp_int = common global i32 0, align 4
+@a = common global [16000 x double] zeroinitializer, align 32
+@b = common global [16000 x double] zeroinitializer, align 32
+@c = common global [16000 x double] zeroinitializer, align 32
+@d = common global [16000 x double] zeroinitializer, align 32
+@e = common global [16000 x double] zeroinitializer, align 32
+@tt = common global [256 x [256 x double]] zeroinitializer, align 32
+@indx = common global [16000 x i32] zeroinitializer, align 32
+@xx = common global double* null, align 8
+@yy = common global double* null, align 8
+
+define i32 @s000() nounwind {
+entry:
+ br label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %for.end, %entry
+ %nl.010 = phi i32 [ 0, %entry ], [ %inc7, %for.end ]
+ br label %for.body3
+
+for.body3: ; preds = %for.body3, %for.cond1.preheader
+ %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next.15, %for.body3 ]
+ %arrayidx = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv
+ %0 = load double* %arrayidx, align 32, !tbaa !0
+ %add = fadd double %0, 1.000000e+00
+ %arrayidx5 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv
+ store double %add, double* %arrayidx5, align 32, !tbaa !0
+ %indvars.iv.next11 = or i64 %indvars.iv, 1
+ %arrayidx.1 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next11
+ %1 = load double* %arrayidx.1, align 8, !tbaa !0
+ %add.1 = fadd double %1, 1.000000e+00
+ %arrayidx5.1 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next11
+ store double %add.1, double* %arrayidx5.1, align 8, !tbaa !0
+ %indvars.iv.next.112 = or i64 %indvars.iv, 2
+ %arrayidx.2 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.112
+ %2 = load double* %arrayidx.2, align 16, !tbaa !0
+ %add.2 = fadd double %2, 1.000000e+00
+ %arrayidx5.2 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.112
+ store double %add.2, double* %arrayidx5.2, align 16, !tbaa !0
+ %indvars.iv.next.213 = or i64 %indvars.iv, 3
+ %arrayidx.3 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.213
+ %3 = load double* %arrayidx.3, align 8, !tbaa !0
+ %add.3 = fadd double %3, 1.000000e+00
+ %arrayidx5.3 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.213
+ store double %add.3, double* %arrayidx5.3, align 8, !tbaa !0
+ %indvars.iv.next.314 = or i64 %indvars.iv, 4
+ %arrayidx.4 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.314
+ %4 = load double* %arrayidx.4, align 32, !tbaa !0
+ %add.4 = fadd double %4, 1.000000e+00
+ %arrayidx5.4 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.314
+ store double %add.4, double* %arrayidx5.4, align 32, !tbaa !0
+ %indvars.iv.next.415 = or i64 %indvars.iv, 5
+ %arrayidx.5 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.415
+ %5 = load double* %arrayidx.5, align 8, !tbaa !0
+ %add.5 = fadd double %5, 1.000000e+00
+ %arrayidx5.5 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.415
+ store double %add.5, double* %arrayidx5.5, align 8, !tbaa !0
+ %indvars.iv.next.516 = or i64 %indvars.iv, 6
+ %arrayidx.6 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.516
+ %6 = load double* %arrayidx.6, align 16, !tbaa !0
+ %add.6 = fadd double %6, 1.000000e+00
+ %arrayidx5.6 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.516
+ store double %add.6, double* %arrayidx5.6, align 16, !tbaa !0
+ %indvars.iv.next.617 = or i64 %indvars.iv, 7
+ %arrayidx.7 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.617
+ %7 = load double* %arrayidx.7, align 8, !tbaa !0
+ %add.7 = fadd double %7, 1.000000e+00
+ %arrayidx5.7 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.617
+ store double %add.7, double* %arrayidx5.7, align 8, !tbaa !0
+ %indvars.iv.next.718 = or i64 %indvars.iv, 8
+ %arrayidx.8 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.718
+ %8 = load double* %arrayidx.8, align 32, !tbaa !0
+ %add.8 = fadd double %8, 1.000000e+00
+ %arrayidx5.8 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.718
+ store double %add.8, double* %arrayidx5.8, align 32, !tbaa !0
+ %indvars.iv.next.819 = or i64 %indvars.iv, 9
+ %arrayidx.9 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.819
+ %9 = load double* %arrayidx.9, align 8, !tbaa !0
+ %add.9 = fadd double %9, 1.000000e+00
+ %arrayidx5.9 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.819
+ store double %add.9, double* %arrayidx5.9, align 8, !tbaa !0
+ %indvars.iv.next.920 = or i64 %indvars.iv, 10
+ %arrayidx.10 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.920
+ %10 = load double* %arrayidx.10, align 16, !tbaa !0
+ %add.10 = fadd double %10, 1.000000e+00
+ %arrayidx5.10 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.920
+ store double %add.10, double* %arrayidx5.10, align 16, !tbaa !0
+ %indvars.iv.next.1021 = or i64 %indvars.iv, 11
+ %arrayidx.11 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1021
+ %11 = load double* %arrayidx.11, align 8, !tbaa !0
+ %add.11 = fadd double %11, 1.000000e+00
+ %arrayidx5.11 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1021
+ store double %add.11, double* %arrayidx5.11, align 8, !tbaa !0
+ %indvars.iv.next.1122 = or i64 %indvars.iv, 12
+ %arrayidx.12 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1122
+ %12 = load double* %arrayidx.12, align 32, !tbaa !0
+ %add.12 = fadd double %12, 1.000000e+00
+ %arrayidx5.12 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1122
+ store double %add.12, double* %arrayidx5.12, align 32, !tbaa !0
+ %indvars.iv.next.1223 = or i64 %indvars.iv, 13
+ %arrayidx.13 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1223
+ %13 = load double* %arrayidx.13, align 8, !tbaa !0
+ %add.13 = fadd double %13, 1.000000e+00
+ %arrayidx5.13 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1223
+ store double %add.13, double* %arrayidx5.13, align 8, !tbaa !0
+ %indvars.iv.next.1324 = or i64 %indvars.iv, 14
+ %arrayidx.14 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1324
+ %14 = load double* %arrayidx.14, align 16, !tbaa !0
+ %add.14 = fadd double %14, 1.000000e+00
+ %arrayidx5.14 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1324
+ store double %add.14, double* %arrayidx5.14, align 16, !tbaa !0
+ %indvars.iv.next.1425 = or i64 %indvars.iv, 15
+ %arrayidx.15 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1425
+ %15 = load double* %arrayidx.15, align 8, !tbaa !0
+ %add.15 = fadd double %15, 1.000000e+00
+ %arrayidx5.15 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1425
+ store double %add.15, double* %arrayidx5.15, align 8, !tbaa !0
+ %indvars.iv.next.15 = add i64 %indvars.iv, 16
+ %lftr.wideiv.15 = trunc i64 %indvars.iv.next.15 to i32
+ %exitcond.15 = icmp eq i32 %lftr.wideiv.15, 16000
+ br i1 %exitcond.15, label %for.end, label %for.body3
+
+for.end: ; preds = %for.body3
+ %call = tail call i32 @dummy(double* getelementptr inbounds ([16000 x double]* @X, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @Y, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @Z, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @U, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @V, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @aa, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @bb, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @cc, i64 0, i64 0), double 0.000000e+00) nounwind
+ %inc7 = add nsw i32 %nl.010, 1
+ %exitcond = icmp eq i32 %inc7, 400000
+ br i1 %exitcond, label %for.end8, label %for.cond1.preheader
+
+for.end8: ; preds = %for.end
+ ret i32 0
+
+; CHECK: @s000
+; CHECK: mtctr
+; CHECK: bdnz
+}
+
+declare i32 @dummy(double*, double*, double*, double*, double*, [256 x double]*, [256 x double]*, [256 x double]*, double)
+
+!0 = metadata !{metadata !"double", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/PowerPC/ctrloop-sums.ll b/test/CodeGen/PowerPC/ctrloop-sums.ll
new file mode 100644
index 000000000000..eae8c38eee0e
--- /dev/null
+++ b/test/CodeGen/PowerPC/ctrloop-sums.ll
@@ -0,0 +1,134 @@
+; ModuleID = 'SingleSource/Regression/C/sumarray2d.c'
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc < %s -march=ppc64 | FileCheck %s
+
+@.str = private unnamed_addr constant [23 x i8] c"Sum(Array[%d,%d] = %d\0A\00", align 1
+
+define i32 @SumArray([100 x i32]* nocapture %Array, i32 %NumI, i32 %NumJ) nounwind readonly {
+entry:
+ %cmp12 = icmp eq i32 %NumI, 0
+ br i1 %cmp12, label %for.end8, label %for.cond1.preheader.lr.ph
+
+for.cond1.preheader.lr.ph: ; preds = %entry
+ %cmp29 = icmp eq i32 %NumJ, 0
+ br i1 %cmp29, label %for.inc6, label %for.body3.lr.ph.us
+
+for.inc6.us: ; preds = %for.body3.us
+ %indvars.iv.next17 = add i64 %indvars.iv16, 1
+ %lftr.wideiv18 = trunc i64 %indvars.iv.next17 to i32
+ %exitcond19 = icmp eq i32 %lftr.wideiv18, %NumI
+ br i1 %exitcond19, label %for.end8, label %for.body3.lr.ph.us
+
+for.body3.us: ; preds = %for.body3.us, %for.body3.lr.ph.us
+ %indvars.iv = phi i64 [ 0, %for.body3.lr.ph.us ], [ %indvars.iv.next, %for.body3.us ]
+ %Result.111.us = phi i32 [ %Result.014.us, %for.body3.lr.ph.us ], [ %add.us, %for.body3.us ]
+ %arrayidx5.us = getelementptr inbounds [100 x i32]* %Array, i64 %indvars.iv16, i64 %indvars.iv
+ %0 = load i32* %arrayidx5.us, align 4, !tbaa !0
+ %add.us = add nsw i32 %0, %Result.111.us
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %NumJ
+ br i1 %exitcond, label %for.inc6.us, label %for.body3.us
+
+for.body3.lr.ph.us: ; preds = %for.inc6.us, %for.cond1.preheader.lr.ph
+ %indvars.iv16 = phi i64 [ %indvars.iv.next17, %for.inc6.us ], [ 0, %for.cond1.preheader.lr.ph ]
+ %Result.014.us = phi i32 [ %add.us, %for.inc6.us ], [ 0, %for.cond1.preheader.lr.ph ]
+ br label %for.body3.us
+
+for.inc6: ; preds = %for.inc6, %for.cond1.preheader.lr.ph
+ %i.013 = phi i32 [ %inc7, %for.inc6 ], [ 0, %for.cond1.preheader.lr.ph ]
+ %inc7 = add i32 %i.013, 1
+ %exitcond20 = icmp eq i32 %inc7, %NumI
+ br i1 %exitcond20, label %for.end8, label %for.inc6
+
+for.end8: ; preds = %for.inc6.us, %for.inc6, %entry
+ %Result.0.lcssa = phi i32 [ 0, %entry ], [ %add.us, %for.inc6.us ], [ 0, %for.inc6 ]
+ ret i32 %Result.0.lcssa
+; CHECK: @SumArray
+; CHECK: mtctr
+; CHECK: bdnz
+}
+
+define i32 @main() nounwind {
+entry:
+ %Array = alloca [100 x [100 x i32]], align 4
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv33 = phi i64 [ 0, %entry ], [ %indvars.iv.next34, %for.body ]
+ %0 = trunc i64 %indvars.iv33 to i32
+ %sub = sub i32 0, %0
+ %arrayidx2 = getelementptr inbounds [100 x [100 x i32]]* %Array, i64 0, i64 %indvars.iv33, i64 %indvars.iv33
+ store i32 %sub, i32* %arrayidx2, align 4, !tbaa !0
+ %indvars.iv.next34 = add i64 %indvars.iv33, 1
+ %lftr.wideiv35 = trunc i64 %indvars.iv.next34 to i32
+ %exitcond36 = icmp eq i32 %lftr.wideiv35, 100
+ br i1 %exitcond36, label %for.cond6.preheader, label %for.body
+
+for.cond6.preheader: ; preds = %for.body, %for.inc17
+ %indvars.iv29 = phi i64 [ %indvars.iv.next30, %for.inc17 ], [ 0, %for.body ]
+ br label %for.body8
+
+for.body8: ; preds = %for.inc14, %for.cond6.preheader
+ %indvars.iv = phi i64 [ 0, %for.cond6.preheader ], [ %indvars.iv.next, %for.inc14 ]
+ %1 = trunc i64 %indvars.iv to i32
+ %2 = trunc i64 %indvars.iv29 to i32
+ %cmp9 = icmp eq i32 %1, %2
+ br i1 %cmp9, label %for.inc14, label %if.then
+
+if.then: ; preds = %for.body8
+ %3 = add i64 %indvars.iv, %indvars.iv29
+ %arrayidx13 = getelementptr inbounds [100 x [100 x i32]]* %Array, i64 0, i64 %indvars.iv29, i64 %indvars.iv
+ %4 = trunc i64 %3 to i32
+ store i32 %4, i32* %arrayidx13, align 4, !tbaa !0
+ br label %for.inc14
+
+for.inc14: ; preds = %for.body8, %if.then
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv27 = trunc i64 %indvars.iv.next to i32
+ %exitcond28 = icmp eq i32 %lftr.wideiv27, 100
+ br i1 %exitcond28, label %for.inc17, label %for.body8
+
+for.inc17: ; preds = %for.inc14
+ %indvars.iv.next30 = add i64 %indvars.iv29, 1
+ %lftr.wideiv31 = trunc i64 %indvars.iv.next30 to i32
+ %exitcond32 = icmp eq i32 %lftr.wideiv31, 100
+ br i1 %exitcond32, label %for.body3.lr.ph.us.i, label %for.cond6.preheader
+
+for.inc6.us.i: ; preds = %for.body3.us.i
+ %indvars.iv.next17.i = add i64 %indvars.iv16.i, 1
+ %lftr.wideiv24 = trunc i64 %indvars.iv.next17.i to i32
+ %exitcond25 = icmp eq i32 %lftr.wideiv24, 100
+ br i1 %exitcond25, label %SumArray.exit, label %for.body3.lr.ph.us.i
+
+for.body3.us.i: ; preds = %for.body3.lr.ph.us.i, %for.body3.us.i
+ %indvars.iv.i = phi i64 [ 0, %for.body3.lr.ph.us.i ], [ %indvars.iv.next.i, %for.body3.us.i ]
+ %Result.111.us.i = phi i32 [ %Result.014.us.i, %for.body3.lr.ph.us.i ], [ %add.us.i, %for.body3.us.i ]
+ %arrayidx5.us.i = getelementptr inbounds [100 x [100 x i32]]* %Array, i64 0, i64 %indvars.iv16.i, i64 %indvars.iv.i
+ %5 = load i32* %arrayidx5.us.i, align 4, !tbaa !0
+ %add.us.i = add nsw i32 %5, %Result.111.us.i
+ %indvars.iv.next.i = add i64 %indvars.iv.i, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next.i to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 100
+ br i1 %exitcond, label %for.inc6.us.i, label %for.body3.us.i
+
+for.body3.lr.ph.us.i: ; preds = %for.inc17, %for.inc6.us.i
+ %indvars.iv16.i = phi i64 [ %indvars.iv.next17.i, %for.inc6.us.i ], [ 0, %for.inc17 ]
+ %Result.014.us.i = phi i32 [ %add.us.i, %for.inc6.us.i ], [ 0, %for.inc17 ]
+ br label %for.body3.us.i
+
+SumArray.exit: ; preds = %for.inc6.us.i
+ %call20 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([23 x i8]* @.str, i64 0, i64 0), i32 100, i32 100, i32 %add.us.i) nounwind
+ ret i32 0
+
+; CHECK: @main
+; CHECK: mtctr
+; CHECK: bdnz
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/PowerPC/ctrloops.ll b/test/CodeGen/PowerPC/ctrloops.ll
new file mode 100644
index 000000000000..4b6f7b94af4a
--- /dev/null
+++ b/test/CodeGen/PowerPC/ctrloops.ll
@@ -0,0 +1,79 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-freebsd10.0"
+; RUN: llc < %s -march=ppc64 | FileCheck %s
+
+@a = common global i32 0, align 4
+
+define void @test1(i32 %c) nounwind {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %0 = load volatile i32* @a, align 4, !tbaa !0
+ %add = add nsw i32 %0, %c
+ store volatile i32 %add, i32* @a, align 4, !tbaa !0
+ %inc = add nsw i32 %i.01, 1
+ %exitcond = icmp eq i32 %inc, 2048
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+; CHECK: @test1
+; CHECK-NOT: or 3, 3, 3
+; CHECK: mtctr
+; CHECK-NOT: addi
+; CHECK-NOT: cmplwi
+; CHECK: bdnz
+}
+
+define void @test2(i32 %c, i32 %d) nounwind {
+entry:
+ %cmp1 = icmp sgt i32 %d, 0
+ br i1 %cmp1, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %0 = load volatile i32* @a, align 4, !tbaa !0
+ %add = add nsw i32 %0, %c
+ store volatile i32 %add, i32* @a, align 4, !tbaa !0
+ %inc = add nsw i32 %i.02, 1
+ %exitcond = icmp eq i32 %inc, %d
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+; CHECK: @test2
+; CHECK: mtctr
+; CHECK-NOT: addi
+; CHECK-NOT: cmplwi
+; CHECK: bdnz
+}
+
+define void @test3(i32 %c, i32 %d) nounwind {
+entry:
+ %cmp1 = icmp sgt i32 %d, 0
+ br i1 %cmp1, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %mul = mul nsw i32 %i.02, %c
+ %0 = load volatile i32* @a, align 4, !tbaa !0
+ %add = add nsw i32 %0, %mul
+ store volatile i32 %add, i32* @a, align 4, !tbaa !0
+ %inc = add nsw i32 %i.02, 1
+ %exitcond = icmp eq i32 %inc, %d
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+; CHECK: @test3
+; CHECK: mtctr
+; CHECK-NOT: addi
+; CHECK-NOT: cmplwi
+; CHECK: bdnz
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/PowerPC/darwin-labels.ll b/test/CodeGen/PowerPC/darwin-labels.ll
index af233697403d..56f7782138a6 100644
--- a/test/CodeGen/PowerPC/darwin-labels.ll
+++ b/test/CodeGen/PowerPC/darwin-labels.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | grep {foo bar":}
+; RUN: llc < %s | grep 'foo bar":'
target datalayout = "E-p:32:32"
target triple = "powerpc-apple-darwin8.2.0"
diff --git a/test/CodeGen/PowerPC/fabs.ll b/test/CodeGen/PowerPC/fabs.ll
index 6ef740f835cb..ddcce745084a 100644
--- a/test/CodeGen/PowerPC/fabs.ll
+++ b/test/CodeGen/PowerPC/fabs.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | grep {fabs f1, f1}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | grep "fabs f1, f1"
define double @fabs(double %f) {
entry:
- %tmp2 = tail call double @fabs( double %f ) ; <double> [#uses=1]
+ %tmp2 = tail call double @fabs( double %f ) readnone ; <double> [#uses=1]
ret double %tmp2
}
diff --git a/test/CodeGen/PowerPC/fma.ll b/test/CodeGen/PowerPC/fma.ll
index 815c72c1f8a7..27496f7937e6 100644
--- a/test/CodeGen/PowerPC/fma.ll
+++ b/test/CodeGen/PowerPC/fma.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=ppc32 | \
-; RUN: egrep {fn?madd|fn?msub} | count 8
+; RUN: llc < %s -march=ppc32 -fp-contract=fast | \
+; RUN: egrep "fn?madd|fn?msub" | count 8
define double @test_FMADD1(double %A, double %B, double %C) {
%D = fmul double %A, %B ; <double> [#uses=1]
diff --git a/test/CodeGen/PowerPC/fnabs.ll b/test/CodeGen/PowerPC/fnabs.ll
index bbd5c7159edc..9fa2dcb2909c 100644
--- a/test/CodeGen/PowerPC/fnabs.ll
+++ b/test/CodeGen/PowerPC/fnabs.ll
@@ -3,7 +3,7 @@
declare double @fabs(double)
define double @test(double %X) {
- %Y = call double @fabs( double %X ) ; <double> [#uses=1]
+ %Y = call double @fabs( double %X ) readnone ; <double> [#uses=1]
%Z = fsub double -0.000000e+00, %Y ; <double> [#uses=1]
ret double %Z
}
diff --git a/test/CodeGen/PowerPC/fsqrt.ll b/test/CodeGen/PowerPC/fsqrt.ll
index 74a8725eb12e..bf8c4a22c95f 100644
--- a/test/CodeGen/PowerPC/fsqrt.ll
+++ b/test/CodeGen/PowerPC/fsqrt.ll
@@ -2,13 +2,13 @@
; otherwise.
; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=+fsqrt | \
-; RUN: grep {fsqrt f1, f1}
+; RUN: grep "fsqrt f1, f1"
; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g5 | \
-; RUN: grep {fsqrt f1, f1}
+; RUN: grep "fsqrt f1, f1"
; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=-fsqrt | \
-; RUN: not grep {fsqrt f1, f1}
+; RUN: not grep "fsqrt f1, f1"
; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g4 | \
-; RUN: not grep {fsqrt f1, f1}
+; RUN: not grep "fsqrt f1, f1"
declare double @llvm.sqrt.f64(double)
diff --git a/test/CodeGen/PowerPC/iabs.ll b/test/CodeGen/PowerPC/iabs.ll
index a43f09c7d561..7d089bbd653c 100644
--- a/test/CodeGen/PowerPC/iabs.ll
+++ b/test/CodeGen/PowerPC/iabs.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=ppc32 -stats |& \
-; RUN: grep {4 .*Number of machine instrs printed}
+; RUN: llc < %s -march=ppc32 -stats 2>&1 | \
+; RUN: grep "4 .*Number of machine instrs printed"
;; Integer absolute value, should produce something as good as:
;; srawi r2, r3, 31
diff --git a/test/CodeGen/PowerPC/isel.ll b/test/CodeGen/PowerPC/isel.ll
new file mode 100644
index 000000000000..ed494c57d344
--- /dev/null
+++ b/test/CodeGen/PowerPC/isel.ll
@@ -0,0 +1,23 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc -mcpu=a2 < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+
+define i64 @test1(i64 %a, i64 %b, i64 %c, i64 %d) {
+entry:
+ %p = icmp uge i64 %a, %b
+ %x = select i1 %p, i64 %c, i64 %d
+ ret i64 %x
+; CHECK: @test1
+; CHECK: isel
+}
+
+define i32 @test2(i32 %a, i32 %b, i32 %c, i32 %d) {
+entry:
+ %p = icmp uge i32 %a, %b
+ %x = select i1 %p, i32 %c, i32 %d
+ ret i32 %x
+; CHECK: @test2
+; CHECK: isel
+}
+
diff --git a/test/CodeGen/PowerPC/ispositive.ll b/test/CodeGen/PowerPC/ispositive.ll
index 4161e3438a4b..78cdf4a4d902 100644
--- a/test/CodeGen/PowerPC/ispositive.ll
+++ b/test/CodeGen/PowerPC/ispositive.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN: grep {srwi r3, r3, 31}
+; RUN: grep "srwi r3, r3, 31"
define i32 @test1(i32 %X) {
entry:
diff --git a/test/CodeGen/PowerPC/lbzux.ll b/test/CodeGen/PowerPC/lbzux.ll
new file mode 100644
index 000000000000..12f1d1f130d8
--- /dev/null
+++ b/test/CodeGen/PowerPC/lbzux.ll
@@ -0,0 +1,49 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc < %s | FileCheck %s
+
+define fastcc void @allocateSpace(i1 %cond1, i1 %cond2) nounwind {
+entry:
+ %0 = load i8** undef, align 8, !tbaa !0
+ br i1 undef, label %return, label %lor.lhs.false
+
+lor.lhs.false: ; preds = %entry
+ br i1 undef, label %if.end7, label %return
+
+if.end7: ; preds = %lor.lhs.false
+ br i1 undef, label %if.then15, label %if.end71
+
+if.then15: ; preds = %if.end7
+ br label %while.cond
+
+while.cond: ; preds = %while.body, %if.then15
+ %idxprom17 = sext i32 0 to i64
+ %arrayidx18 = getelementptr inbounds i8* %0, i64 %idxprom17
+ %or = or i32 undef, undef
+ br i1 %cond1, label %if.end71, label %while.body
+
+while.body: ; preds = %while.cond
+ br i1 %cond2, label %while.cond, label %if.then45
+
+if.then45: ; preds = %while.body
+ %idxprom48139 = zext i32 %or to i64
+ %arrayidx49 = getelementptr inbounds i8* %0, i64 %idxprom48139
+ %1 = bitcast i8* %arrayidx49 to i16*
+ %2 = bitcast i8* %arrayidx18 to i16*
+ %3 = load i16* %1, align 1
+ store i16 %3, i16* %2, align 1
+ br label %return
+
+if.end71: ; preds = %while.cond, %if.end7
+ unreachable
+
+return: ; preds = %if.then45, %lor.lhs.false, %entry
+ ret void
+
+; CHECK: @allocateSpace
+; CHECK: lbzux
+}
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/PowerPC/long-compare.ll b/test/CodeGen/PowerPC/long-compare.ll
index 94c2526cf5b9..915595f6dbac 100644
--- a/test/CodeGen/PowerPC/long-compare.ll
+++ b/test/CodeGen/PowerPC/long-compare.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=ppc32 | grep cntlzw
; RUN: llc < %s -march=ppc32 | not grep xori
-; RUN: llc < %s -march=ppc32 | not grep {li }
-; RUN: llc < %s -march=ppc32 | not grep {mr }
+; RUN: llc < %s -march=ppc32 | not grep "li "
+; RUN: llc < %s -march=ppc32 | not grep "mr "
define i1 @test(i64 %x) {
%tmp = icmp ult i64 %x, 4294967296
diff --git a/test/CodeGen/PowerPC/lsr-postinc-pos.ll b/test/CodeGen/PowerPC/lsr-postinc-pos.ll
index f441e42da2f1..42472c58fe8b 100644
--- a/test/CodeGen/PowerPC/lsr-postinc-pos.ll
+++ b/test/CodeGen/PowerPC/lsr-postinc-pos.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -print-lsr-output |& FileCheck %s
+; RUN: llc < %s -print-lsr-output 2>&1 | FileCheck %s
; The icmp is a post-inc use, and the increment is in %bb11, but the
; scevgep needs to be inserted in %bb so that it is dominated by %t.
diff --git a/test/CodeGen/PowerPC/mem_update.ll b/test/CodeGen/PowerPC/mem_update.ll
index 17e7e2849c99..39af11a3d54c 100644
--- a/test/CodeGen/PowerPC/mem_update.ll
+++ b/test/CodeGen/PowerPC/mem_update.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=ppc32 -enable-ppc-preinc | \
+; RUN: llc < %s -march=ppc32 | \
; RUN: not grep addi
-; RUN: llc < %s -march=ppc64 -enable-ppc-preinc | \
+; RUN: llc < %s -march=ppc64 | \
; RUN: not grep addi
@Glob = global i64 4
diff --git a/test/CodeGen/PowerPC/no-dead-strip.ll b/test/CodeGen/PowerPC/no-dead-strip.ll
index 34594132530d..6320e2812c1b 100644
--- a/test/CodeGen/PowerPC/no-dead-strip.ll
+++ b/test/CodeGen/PowerPC/no-dead-strip.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | grep {no_dead_strip.*_X}
+; RUN: llc < %s | grep "no_dead_strip.*_X"
target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
target triple = "powerpc-apple-darwin8.8.0"
diff --git a/test/CodeGen/PowerPC/ppc440-fp-basic.ll b/test/CodeGen/PowerPC/ppc440-fp-basic.ll
index 1fad2fa3aaf5..77b726c5ae38 100644
--- a/test/CodeGen/PowerPC/ppc440-fp-basic.ll
+++ b/test/CodeGen/PowerPC/ppc440-fp-basic.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 -mcpu=440 | FileCheck %s
+; RUN: llc < %s -march=ppc32 -mcpu=440 -fp-contract=fast | FileCheck %s
%0 = type { double, double }
diff --git a/test/CodeGen/PowerPC/ppc64-cyclecounter.ll b/test/CodeGen/PowerPC/ppc64-cyclecounter.ll
new file mode 100644
index 000000000000..38406cabb2df
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-cyclecounter.ll
@@ -0,0 +1,15 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc < %s | FileCheck %s
+
+define i64 @test1() nounwind {
+entry:
+ %r = call i64 @llvm.readcyclecounter()
+ ret i64 %r
+}
+
+; CHECK: @test1
+; CHECK: mfspr 3, 268
+
+declare i64 @llvm.readcyclecounter()
+
diff --git a/test/CodeGen/PowerPC/retaddr.ll b/test/CodeGen/PowerPC/retaddr.ll
index cf16b4c26f62..c931dfe93575 100644
--- a/test/CodeGen/PowerPC/retaddr.ll
+++ b/test/CodeGen/PowerPC/retaddr.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -march=ppc32 | grep mflr
; RUN: llc < %s -march=ppc32 | grep lwz
-; RUN: llc < %s -march=ppc64 | grep {ld r., 16(r1)}
+; RUN: llc < %s -march=ppc64 | grep "ld r., 16(r1)"
target triple = "powerpc-apple-darwin8"
diff --git a/test/CodeGen/PowerPC/rlwimi-commute.ll b/test/CodeGen/PowerPC/rlwimi-commute.ll
index 6410c63234d2..3f90008c006b 100644
--- a/test/CodeGen/PowerPC/rlwimi-commute.ll
+++ b/test/CodeGen/PowerPC/rlwimi-commute.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=ppc32 | grep rlwimi
-; RUN: llc < %s -march=ppc32 | not grep {or }
+; RUN: llc < %s -march=ppc32 | not grep "or "
; Make sure there is no register-register copies here.
diff --git a/test/CodeGen/PowerPC/rlwimi3.ll b/test/CodeGen/PowerPC/rlwimi3.ll
index 05d37bf1625f..7efdbe9634fe 100644
--- a/test/CodeGen/PowerPC/rlwimi3.ll
+++ b/test/CodeGen/PowerPC/rlwimi3.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=ppc32 -stats |& \
-; RUN: grep {Number of machine instrs printed} | grep 12
+; RUN: llc < %s -march=ppc32 -stats 2>&1 | \
+; RUN: grep "Number of machine instrs printed" | grep 12
define i16 @Trans16Bit(i32 %srcA, i32 %srcB, i32 %alpha) {
%tmp1 = shl i32 %srcA, 15 ; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/seteq-0.ll b/test/CodeGen/PowerPC/seteq-0.ll
index 688b29aa124f..731958374ee2 100644
--- a/test/CodeGen/PowerPC/seteq-0.ll
+++ b/test/CodeGen/PowerPC/seteq-0.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN: grep {srwi r., r., 5}
+; RUN: grep "srwi r., r., 5"
define i32 @eq0(i32 %a) {
%tmp.1 = icmp eq i32 %a, 0 ; <i1> [#uses=1]
diff --git a/test/CodeGen/PowerPC/small-arguments.ll b/test/CodeGen/PowerPC/small-arguments.ll
index b4767b0a291b..19ca0985eef1 100644
--- a/test/CodeGen/PowerPC/small-arguments.ll
+++ b/test/CodeGen/PowerPC/small-arguments.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 | not grep {extsh\\|rlwinm}
+; RUN: llc < %s -march=ppc32 | not grep "extsh\|rlwinm"
declare signext i16 @foo()
diff --git a/test/CodeGen/PowerPC/stack-protector.ll b/test/CodeGen/PowerPC/stack-protector.ll
index 202036125026..810630f6978f 100644
--- a/test/CodeGen/PowerPC/stack-protector.ll
+++ b/test/CodeGen/PowerPC/stack-protector.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=ppc32 < %s -o - | grep {__stack_chk_guard}
-; RUN: llc -march=ppc32 < %s -o - | grep {__stack_chk_fail}
+; RUN: llc -march=ppc32 < %s -o - | grep "__stack_chk_guard"
+; RUN: llc -march=ppc32 < %s -o - | grep "__stack_chk_fail"
@"\01LC" = internal constant [11 x i8] c"buf == %s\0A\00" ; <[11 x i8]*> [#uses=1]
diff --git a/test/CodeGen/PowerPC/stwu-gta.ll b/test/CodeGen/PowerPC/stwu-gta.ll
new file mode 100644
index 000000000000..4febe7e2fe7e
--- /dev/null
+++ b/test/CodeGen/PowerPC/stwu-gta.ll
@@ -0,0 +1,22 @@
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
+target triple = "powerpc-unknown-linux"
+; RUN: llc < %s | FileCheck %s
+
+%class.Two.0.5 = type { i32, i32, i32 }
+
+@foo = external global %class.Two.0.5, align 4
+
+define void @_GLOBAL__I_a() nounwind section ".text.startup" {
+entry:
+ store i32 5, i32* getelementptr inbounds (%class.Two.0.5* @foo, i32 0, i32 0), align 4, !tbaa !0
+ store i32 6, i32* getelementptr inbounds (%class.Two.0.5* @foo, i32 0, i32 1), align 4, !tbaa !0
+ ret void
+}
+
+; CHECK: @_GLOBAL__I_a
+; CHECK-NOT: stwux
+; CHECK: stwu
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/PowerPC/stwu8.ll b/test/CodeGen/PowerPC/stwu8.ll
new file mode 100644
index 000000000000..897bfc6d6caa
--- /dev/null
+++ b/test/CodeGen/PowerPC/stwu8.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%class.spell_checker.21.103.513.538 = type { %"class.std::map.20.102.512.537" }
+%"class.std::map.20.102.512.537" = type { %"class.std::_Rb_tree.19.101.511.536" }
+%"class.std::_Rb_tree.19.101.511.536" = type { %"struct.std::_Rb_tree<std::pair<const char *, const char *>, std::pair<const std::pair<const char *, const char *>, int>, std::_Select1st<std::pair<const std::pair<const char *, const char *>, int>>, std::less<std::pair<const char *, const char *>>, std::allocator<std::pair<const std::pair<const char *, const char *>, int>> >::_Rb_tree_impl.18.100.510.535" }
+%"struct.std::_Rb_tree<std::pair<const char *, const char *>, std::pair<const std::pair<const char *, const char *>, int>, std::_Select1st<std::pair<const std::pair<const char *, const char *>, int>>, std::less<std::pair<const char *, const char *>>, std::allocator<std::pair<const std::pair<const char *, const char *>, int>> >::_Rb_tree_impl.18.100.510.535" = type { %"struct.std::less.16.98.508.533", %"struct.std::_Rb_tree_node_base.17.99.509.534", i64 }
+%"struct.std::less.16.98.508.533" = type { i8 }
+%"struct.std::_Rb_tree_node_base.17.99.509.534" = type { i32, %"struct.std::_Rb_tree_node_base.17.99.509.534"*, %"struct.std::_Rb_tree_node_base.17.99.509.534"*, %"struct.std::_Rb_tree_node_base.17.99.509.534"* }
+
+define void @test1(%class.spell_checker.21.103.513.538* %this) unnamed_addr align 2 {
+entry:
+ %_M_header.i.i.i.i.i.i = getelementptr inbounds %class.spell_checker.21.103.513.538* %this, i64 0, i32 0, i32 0, i32 0, i32 1
+ %0 = bitcast %"struct.std::_Rb_tree_node_base.17.99.509.534"* %_M_header.i.i.i.i.i.i to i8*
+ call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 40, i32 4, i1 false) nounwind
+ store %"struct.std::_Rb_tree_node_base.17.99.509.534"* %_M_header.i.i.i.i.i.i, %"struct.std::_Rb_tree_node_base.17.99.509.534"** undef, align 8, !tbaa !0
+ unreachable
+}
+
+; CHECK: @test1
+; CHECK: stwu
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/PowerPC/stwux.ll b/test/CodeGen/PowerPC/stwux.ll
new file mode 100644
index 000000000000..737e9d9f0ecb
--- /dev/null
+++ b/test/CodeGen/PowerPC/stwux.ll
@@ -0,0 +1,47 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc < %s | FileCheck %s
+
+@multvec_i = external unnamed_addr global [100 x i32], align 4
+
+define fastcc void @subs_STMultiExceptIntern() nounwind {
+entry:
+ br i1 undef, label %while.body.lr.ph, label %return
+
+while.body.lr.ph: ; preds = %entry
+ br label %while.body
+
+while.body: ; preds = %if.end12, %while.body.lr.ph
+ %i.0240 = phi i32 [ -1, %while.body.lr.ph ], [ %i.1, %if.end12 ]
+ br i1 undef, label %if.end12, label %if.then
+
+if.then: ; preds = %while.body
+ br label %if.end12
+
+if.end12: ; preds = %if.then, %while.body
+ %i.1 = phi i32 [ %i.0240, %while.body ], [ undef, %if.then ]
+ br i1 undef, label %while.body, label %while.end
+
+while.end: ; preds = %if.end12
+ br i1 undef, label %return, label %if.end15
+
+if.end15: ; preds = %while.end
+ %idxprom.i.i230 = sext i32 %i.1 to i64
+ %arrayidx18 = getelementptr inbounds [100 x i32]* @multvec_i, i64 0, i64 %idxprom.i.i230
+ store i32 0, i32* %arrayidx18, align 4
+ br i1 undef, label %while.body21, label %while.end90
+
+while.body21: ; preds = %if.end15
+ unreachable
+
+while.end90: ; preds = %if.end15
+ store i32 0, i32* %arrayidx18, align 4
+ br label %return
+
+return: ; preds = %while.end90, %while.end, %entry
+ ret void
+
+; CHECK: @subs_STMultiExceptIntern
+; CHECK: stwux
+}
+
diff --git a/test/CodeGen/PowerPC/tls.ll b/test/CodeGen/PowerPC/tls.ll
new file mode 100644
index 000000000000..713893bf5862
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls.ll
@@ -0,0 +1,16 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-freebsd10.0"
+; RUN: llc < %s -march=ppc64 | FileCheck %s
+
+@a = thread_local global i32 0, align 4
+
+;CHECK: localexec:
+define i32 @localexec() nounwind {
+entry:
+;CHECK: addis [[REG1:[0-9]+]], 13, a@tprel@ha
+;CHECK-NEXT: li [[REG2:[0-9]+]], 42
+;CHECK-NEXT: addi [[REG1]], [[REG1]], a@tprel@l
+;CHECK-NEXT: stw [[REG2]], 0([[REG1]])
+ store i32 42, i32* @a, align 4
+ ret i32 0
+}
diff --git a/test/CodeGen/PowerPC/trampoline.ll b/test/CodeGen/PowerPC/trampoline.ll
index 91b201146b6d..3ea46f50e0c0 100644
--- a/test/CodeGen/PowerPC/trampoline.ll
+++ b/test/CodeGen/PowerPC/trampoline.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 | grep {__trampoline_setup}
+; RUN: llc < %s -march=ppc32 | grep "__trampoline_setup"
module asm "\09.lazy_reference .objc_class_name_NSImageRep"
module asm "\09.objc_class_name_NSBitmapImageRep=0"
diff --git a/test/CodeGen/PowerPC/vec_buildvector_loadstore.ll b/test/CodeGen/PowerPC/vec_buildvector_loadstore.ll
index 015c08605fea..7e58ec0bdef4 100644
--- a/test/CodeGen/PowerPC/vec_buildvector_loadstore.ll
+++ b/test/CodeGen/PowerPC/vec_buildvector_loadstore.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin -mattr=+altivec | FileCheck %s
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin -mattr=+altivec -disable-ppc-ilp-pref | FileCheck %s
; Formerly this did byte loads and word stores.
@a = external global <16 x i8>
@b = external global <16 x i8>
diff --git a/test/CodeGen/SPARC/2012-05-01-LowerArguments.ll b/test/CodeGen/SPARC/2012-05-01-LowerArguments.ll
new file mode 100644
index 000000000000..a607f109762f
--- /dev/null
+++ b/test/CodeGen/SPARC/2012-05-01-LowerArguments.ll
@@ -0,0 +1,13 @@
+; Just check that this doesn't crash:
+; RUN: llc < %s
+; PR2960
+
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
+target triple = "sparc-unknown-linux-gnu"
+ %"5tango4core9Exception11IOException" = type { [5 x i8*]*, i8*, { i64, i8* }, { i64, i8* }, i64, %"6Object7Monitor"*, %"5tango4core9Exception11IOException"* }
+ %"6Object7Monitor" = type { [3 x i8*]*, i8* }
+
+define fastcc %"5tango4core9Exception11IOException"* @_D5tango4core9Exception13TextException5_ctorMFAaZC5tango4core9Exception13TextException(%"5tango4core9Exception11IOException"* %this, { i64, i8* } %msg) {
+entry_tango.core.Exception.TextException.this:
+ unreachable
+}
diff --git a/test/CodeGen/SPARC/private.ll b/test/CodeGen/SPARC/private.ll
index f06ccd0df1aa..38cea4ca6e4f 100644
--- a/test/CodeGen/SPARC/private.ll
+++ b/test/CodeGen/SPARC/private.ll
@@ -1,14 +1,11 @@
; Test to make sure that the 'private' is used correctly.
;
-; RUN: llc < %s -march=sparc > %t
-; RUN: grep .foo: %t
-; RUN: grep call.*\.foo %t
-; RUN: grep .baz: %t
-; RUN: grep ld.*\.baz %t
+; RUN: llc < %s -march=sparc | FileCheck %s
define private void @foo() {
ret void
}
+; CHECK: [[FOO:\..*foo]]:
@baz = private global i32 4
@@ -17,3 +14,8 @@ define i32 @bar() {
%1 = load i32* @baz, align 4
ret i32 %1
}
+
+; CHECK: call [[FOO]]
+; CHECK: ld {{.+}}[[BAZ:\..*baz]]
+
+; CHECK: [[BAZ]]
diff --git a/test/CodeGen/Thumb/2012-04-26-M0ISelBug.ll b/test/CodeGen/Thumb/2012-04-26-M0ISelBug.ll
new file mode 100644
index 000000000000..a4c05d2492a4
--- /dev/null
+++ b/test/CodeGen/Thumb/2012-04-26-M0ISelBug.ll
@@ -0,0 +1,12 @@
+; RUN: llc -mtriple=thumbv6-apple-ios -mcpu=cortex-m0 < %s | FileCheck %s
+; Cortex-M0 doesn't have 32-bit Thumb2 instructions (except for dmb, mrs, etc.)
+; rdar://11331541
+
+define i32 @t(i32 %a) nounwind {
+; CHECK: t:
+; CHECK: asrs [[REG1:(r[0-9]+)]], [[REG2:(r[0-9]+)]], #31
+; CHECK: eors [[REG1]], [[REG2]]
+ %tmp0 = ashr i32 %a, 31
+ %tmp1 = xor i32 %tmp0, %a
+ ret i32 %tmp1
+}
diff --git a/test/CodeGen/Thumb/asmprinter-bug.ll b/test/CodeGen/Thumb/asmprinter-bug.ll
index f73f93d919af..18e11baf444c 100644
--- a/test/CodeGen/Thumb/asmprinter-bug.ll
+++ b/test/CodeGen/Thumb/asmprinter-bug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv6-apple-darwin10 | grep rsbs | grep {#0}
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin10 | grep rsbs | grep "#0"
%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
%struct.__sFILEX = type opaque
diff --git a/test/CodeGen/Thumb/frame_thumb.ll b/test/CodeGen/Thumb/frame_thumb.ll
index 0cac7554be03..6cc4dd12f6a7 100644
--- a/test/CodeGen/Thumb/frame_thumb.ll
+++ b/test/CodeGen/Thumb/frame_thumb.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -mtriple=thumb-apple-darwin \
-; RUN: -disable-fp-elim | not grep {r11}
+; RUN: -disable-fp-elim | not grep "r11"
; RUN: llc < %s -mtriple=thumb-linux-gnueabi \
-; RUN: -disable-fp-elim | not grep {r11}
+; RUN: -disable-fp-elim | not grep "r11"
define i32 @f() {
entry:
diff --git a/test/CodeGen/Thumb/iabs.ll b/test/CodeGen/Thumb/iabs.ll
index d03b5b2e3bef..2e77660c45c1 100644
--- a/test/CodeGen/Thumb/iabs.ll
+++ b/test/CodeGen/Thumb/iabs.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=thumb -stats |& \
-; RUN: grep {4 .*Number of machine instrs printed}
+; RUN: llc < %s -march=thumb -stats 2>&1 | \
+; RUN: grep "4 .*Number of machine instrs printed"
;; Integer absolute value, should produce something as good as:
;; Thumb:
diff --git a/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll b/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll
index af7d716446b9..348e9d3f20a7 100644
--- a/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll
+++ b/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll
@@ -1,4 +1,4 @@
-; RUN: llc -relocation-model=pic < %s | grep {:$} | sort | uniq -d | count 0
+; RUN: llc -relocation-model=pic < %s | grep ":$" | sort | uniq -d | count 0
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
target triple = "thumbv7-apple-darwin10"
diff --git a/test/CodeGen/Thumb2/constant-islands.ll b/test/CodeGen/Thumb2/constant-islands.ll
index 19d23851da8a..255b709edb73 100644
--- a/test/CodeGen/Thumb2/constant-islands.ll
+++ b/test/CodeGen/Thumb2/constant-islands.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=arm -mcpu=cortex-a8 -O0 -filetype=obj -o %t.o
; RUN: llc < %s -march=thumb -mcpu=cortex-a8 -O0 -filetype=obj -o %t.o
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 -O2 -filetype=obj -o %t.o
-; RUN: llc < %s -march=thumb -mcpu=cortex-a8 -O2 -filetype=obj -o %t.o
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 -O2 -filetype=obj -verify-machineinstrs -o %t.o
+; RUN: llc < %s -march=thumb -mcpu=cortex-a8 -O2 -filetype=obj -verify-machineinstrs -o %t.o
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
target triple = "thumbv7-apple-ios"
diff --git a/test/CodeGen/Thumb2/inflate-regs.ll b/test/CodeGen/Thumb2/inflate-regs.ll
new file mode 100644
index 000000000000..d8a558c97e27
--- /dev/null
+++ b/test/CodeGen/Thumb2/inflate-regs.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -mcpu=cortex-a8 | FileCheck %s
+target triple = "thumbv7-apple-ios"
+
+; CHECK: local_split
+;
+; The load must go into d0-15 which are all clobbered by the asm.
+; RAGreedy should split the range and use d16-d31 to avoid a spill.
+;
+; CHECK: vldr s
+; CHECK-NOT: vstr
+; CHECK: vadd.f32
+; CHECK-NOT: vstr
+; CHECK: vorr
+; CHECK: vstr s
+define void @local_split(float* nocapture %p) nounwind ssp {
+entry:
+ %x = load float* %p, align 4
+ %a = fadd float %x, 1.0
+ tail call void asm sideeffect "", "~{d0},~{d1},~{d2},~{d3},~{d4},~{d5},~{d6},~{d7},~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"() nounwind
+ store float %a, float* %p, align 4
+ ret void
+}
+
+; CHECK: global_split
+;
+; Same thing, but across basic blocks.
+;
+; CHECK: vldr s
+; CHECK-NOT: vstr
+; CHECK: vadd.f32
+; CHECK-NOT: vstr
+; CHECK: vorr
+; CHECK: vstr s
+define void @global_split(float* nocapture %p1, float* nocapture %p2) nounwind ssp {
+entry:
+ %0 = load float* %p1, align 4
+ %add = fadd float %0, 1.000000e+00
+ tail call void asm sideeffect "", "~{d0},~{d1},~{d2},~{d3},~{d4},~{d5},~{d6},~{d7},~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"() nounwind
+ %cmp = fcmp ogt float %add, 0.000000e+00
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ store float %add, float* %p2, align 4
+ br label %if.end
+
+if.end:
+ store float %add, float* %p1, align 4
+ ret void
+}
diff --git a/test/CodeGen/Thumb2/inlineasm.ll b/test/CodeGen/Thumb2/inlineasm.ll
new file mode 100644
index 000000000000..30f28f8f0d5b
--- /dev/null
+++ b/test/CodeGen/Thumb2/inlineasm.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -filetype=obj
+
+target triple = "thumbv7-none--eabi"
+
+define void @t1() nounwind {
+entry:
+ call void asm sideeffect "mov r0, r1", ""() nounwind
+ ret void
+}
diff --git a/test/CodeGen/Thumb2/large-call.ll b/test/CodeGen/Thumb2/large-call.ll
index aef6f8560641..61c477aa9180 100644
--- a/test/CodeGen/Thumb2/large-call.ll
+++ b/test/CodeGen/Thumb2/large-call.ll
@@ -3,17 +3,18 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-
target triple = "thumbv7-apple-ios0.0.0"
; This test case would clobber the outgoing call arguments by writing to the
-; emergency spill slot at [sp, #4] without adjusting the stack pointer first.
+; emergency spill slots at [sp, #4] or [sp, #8] without adjusting the stack
+; pointer first.
; CHECK: main
; CHECK: vmov.f64
; Adjust SP for the large call
; CHECK: sub sp,
-; CHECK: mov [[FR:r[0-9]+]], sp
-; Store to call frame + #4
-; CHECK: str{{.*\[}}[[FR]], #4]
+; Store to call frame + #8
+; CHECK: vstr{{.*\[}}sp, #8]
; Don't clobber that store until the call.
; CHECK-NOT: [sp, #4]
+; CHECK-NOT: [sp, #8]
; CHECK: variadic
define i32 @main() ssp {
diff --git a/test/CodeGen/Thumb2/thumb2-cmn.ll b/test/CodeGen/Thumb2/thumb2-cmn.ll
index df221b945e2a..67b07e63fc09 100644
--- a/test/CodeGen/Thumb2/thumb2-cmn.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmn.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
-; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
-; test as 'mov.w r0, #0'. So far, that requires physreg joining.
+; These tests could be improved by 'movs r0, #0' being rematerialized below the
+; test as 'mov.w r0, #0'.
define i1 @f1(i32 %a, i32 %b) {
%nb = sub i32 0, %b
@@ -9,7 +9,7 @@ define i1 @f1(i32 %a, i32 %b) {
ret i1 %tmp
}
; CHECK: f1:
-; CHECK: cmn.w r0, r1
+; CHECK: cmn {{.*}}, r1
define i1 @f2(i32 %a, i32 %b) {
%nb = sub i32 0, %b
@@ -17,7 +17,7 @@ define i1 @f2(i32 %a, i32 %b) {
ret i1 %tmp
}
; CHECK: f2:
-; CHECK: cmn.w r0, r1
+; CHECK: cmn {{.*}}, r1
define i1 @f3(i32 %a, i32 %b) {
%nb = sub i32 0, %b
@@ -25,7 +25,7 @@ define i1 @f3(i32 %a, i32 %b) {
ret i1 %tmp
}
; CHECK: f3:
-; CHECK: cmn.w r0, r1
+; CHECK: cmn {{.*}}, r1
define i1 @f4(i32 %a, i32 %b) {
%nb = sub i32 0, %b
@@ -33,7 +33,7 @@ define i1 @f4(i32 %a, i32 %b) {
ret i1 %tmp
}
; CHECK: f4:
-; CHECK: cmn.w r0, r1
+; CHECK: cmn {{.*}}, r1
define i1 @f5(i32 %a, i32 %b) {
%tmp = shl i32 %b, 5
@@ -42,7 +42,7 @@ define i1 @f5(i32 %a, i32 %b) {
ret i1 %tmp1
}
; CHECK: f5:
-; CHECK: cmn.w r0, r1, lsl #5
+; CHECK: cmn.w {{.*}}, r1, lsl #5
define i1 @f6(i32 %a, i32 %b) {
%tmp = lshr i32 %b, 6
@@ -51,7 +51,7 @@ define i1 @f6(i32 %a, i32 %b) {
ret i1 %tmp1
}
; CHECK: f6:
-; CHECK: cmn.w r0, r1, lsr #6
+; CHECK: cmn.w {{.*}}, r1, lsr #6
define i1 @f7(i32 %a, i32 %b) {
%tmp = ashr i32 %b, 7
@@ -60,7 +60,7 @@ define i1 @f7(i32 %a, i32 %b) {
ret i1 %tmp1
}
; CHECK: f7:
-; CHECK: cmn.w r0, r1, asr #7
+; CHECK: cmn.w {{.*}}, r1, asr #7
define i1 @f8(i32 %a, i32 %b) {
%l8 = shl i32 %a, 24
@@ -71,5 +71,15 @@ define i1 @f8(i32 %a, i32 %b) {
ret i1 %tmp1
}
; CHECK: f8:
-; CHECK: cmn.w r0, r0, ror #8
+; CHECK: cmn.w {{.*}}, {{.*}}, ror #8
+
+define void @f9(i32 %a, i32 %b) nounwind optsize {
+ tail call void asm sideeffect "cmn.w r0, r1", ""() nounwind, !srcloc !0
+ ret void
+}
+
+!0 = metadata !{i32 81}
+
+; CHECK: f9:
+; CHECK: cmn.w r0, r1
diff --git a/test/CodeGen/Thumb2/thumb2-cmp.ll b/test/CodeGen/Thumb2/thumb2-cmp.ll
index da121140be20..4ce7acc22e0f 100644
--- a/test/CodeGen/Thumb2/thumb2-cmp.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmp.ll
@@ -1,12 +1,12 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
-; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
-; test as 'mov.w r0, #0'. So far, that requires physreg joining.
+; These tests would be improved by 'movs r0, #0' being rematerialized below the
+; test as 'mov.w r0, #0'.
; 0x000000bb = 187
define i1 @f1(i32 %a) {
; CHECK: f1:
-; CHECK: cmp r0, #187
+; CHECK: cmp {{.*}}, #187
%tmp = icmp ne i32 %a, 187
ret i1 %tmp
}
@@ -14,7 +14,7 @@ define i1 @f1(i32 %a) {
; 0x00aa00aa = 11141290
define i1 @f2(i32 %a) {
; CHECK: f2:
-; CHECK: cmp.w r0, #11141290
+; CHECK: cmp.w {{.*}}, #11141290
%tmp = icmp eq i32 %a, 11141290
ret i1 %tmp
}
@@ -22,7 +22,7 @@ define i1 @f2(i32 %a) {
; 0xcc00cc00 = 3422604288
define i1 @f3(i32 %a) {
; CHECK: f3:
-; CHECK: cmp.w r0, #-872363008
+; CHECK: cmp.w {{.*}}, #-872363008
%tmp = icmp ne i32 %a, 3422604288
ret i1 %tmp
}
@@ -30,7 +30,7 @@ define i1 @f3(i32 %a) {
; 0xdddddddd = 3722304989
define i1 @f4(i32 %a) {
; CHECK: f4:
-; CHECK: cmp.w r0, #-572662307
+; CHECK: cmp.w {{.*}}, #-572662307
%tmp = icmp ne i32 %a, 3722304989
ret i1 %tmp
}
@@ -38,7 +38,7 @@ define i1 @f4(i32 %a) {
; 0x00110000 = 1114112
define i1 @f5(i32 %a) {
; CHECK: f5:
-; CHECK: cmp.w r0, #1114112
+; CHECK: cmp.w {{.*}}, #1114112
%tmp = icmp eq i32 %a, 1114112
ret i1 %tmp
}
@@ -46,7 +46,7 @@ define i1 @f5(i32 %a) {
; Check that we don't do an invalid (a > b) --> !(a < b + 1) transform.
;
; CHECK: f6:
-; CHECK-NOT: cmp.w r0, #-2147483648
+; CHECK-NOT: cmp.w {{.*}}, #-2147483648
; CHECK: bx lr
define i32 @f6(i32 %a) {
%tmp = icmp sgt i32 %a, 2147483647
diff --git a/test/CodeGen/Thumb2/thumb2-cmp2.ll b/test/CodeGen/Thumb2/thumb2-cmp2.ll
index 15052e006710..f6790deb1fc2 100644
--- a/test/CodeGen/Thumb2/thumb2-cmp2.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmp2.ll
@@ -1,25 +1,25 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
-; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
-; test as 'mov.w r0, #0'. So far, that requires physreg joining.
+; These tests would be improved by 'movs r0, #0' being rematerialized below the
+; test as 'mov.w r0, #0'.
define i1 @f1(i32 %a, i32 %b) {
; CHECK: f1:
-; CHECK: cmp r0, r1
+; CHECK: cmp {{.*}}, r1
%tmp = icmp ne i32 %a, %b
ret i1 %tmp
}
define i1 @f2(i32 %a, i32 %b) {
; CHECK: f2:
-; CHECK: cmp r0, r1
+; CHECK: cmp {{.*}}, r1
%tmp = icmp eq i32 %a, %b
ret i1 %tmp
}
define i1 @f6(i32 %a, i32 %b) {
; CHECK: f6:
-; CHECK: cmp.w r0, r1, lsl #5
+; CHECK: cmp.w {{.*}}, r1, lsl #5
%tmp = shl i32 %b, 5
%tmp1 = icmp eq i32 %tmp, %a
ret i1 %tmp1
@@ -27,7 +27,7 @@ define i1 @f6(i32 %a, i32 %b) {
define i1 @f7(i32 %a, i32 %b) {
; CHECK: f7:
-; CHECK: cmp.w r0, r1, lsr #6
+; CHECK: cmp.w {{.*}}, r1, lsr #6
%tmp = lshr i32 %b, 6
%tmp1 = icmp ne i32 %tmp, %a
ret i1 %tmp1
@@ -35,7 +35,7 @@ define i1 @f7(i32 %a, i32 %b) {
define i1 @f8(i32 %a, i32 %b) {
; CHECK: f8:
-; CHECK: cmp.w r0, r1, asr #7
+; CHECK: cmp.w {{.*}}, r1, asr #7
%tmp = ashr i32 %b, 7
%tmp1 = icmp eq i32 %a, %tmp
ret i1 %tmp1
@@ -43,7 +43,7 @@ define i1 @f8(i32 %a, i32 %b) {
define i1 @f9(i32 %a, i32 %b) {
; CHECK: f9:
-; CHECK: cmp.w r0, r0, ror #8
+; CHECK: cmp.w {{.*}}, {{.*}}, ror #8
%l8 = shl i32 %a, 24
%r8 = lshr i32 %a, 8
%tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-jtb.ll b/test/CodeGen/Thumb2/thumb2-jtb.ll
index 7e1655f6c252..0748b9b32d9a 100644
--- a/test/CodeGen/Thumb2/thumb2-jtb.ll
+++ b/test/CodeGen/Thumb2/thumb2-jtb.ll
@@ -1,9 +1,15 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 -arm-adjust-jump-tables=0 | not grep tbb
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -arm-adjust-jump-tables=0 | FileCheck %s
; Do not use tbb / tbh if any destination is before the jumptable.
; rdar://7102917
define i16 @main__getopt_internal_2E_exit_2E_ce(i32, i1 %b) nounwind {
+; CHECK: main__getopt_internal_2E_exit_2E_ce
+; CHECK-NOT: tbb
+; CHECK-NOT: tbh
+; 32-bit jump tables use explicit branches, not data regions, so make sure
+; we don't annotate this region.
+; CHECK-NOT: data_region
entry:
br i1 %b, label %codeRepl127.exitStub, label %newFuncRoot
diff --git a/test/CodeGen/Thumb2/thumb2-ldr_post.ll b/test/CodeGen/Thumb2/thumb2-ldr_post.ll
index d1af4ba47fe0..2178eecb43e4 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr_post.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr_post.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
-; RUN: grep {ldr.*\\\[.*\],} | count 1
+; RUN: grep "ldr.*\[.*\]," | count 1
define i32 @test(i32 %a, i32 %b, i32 %c) {
%tmp1 = mul i32 %a, %b ; <i32> [#uses=2]
diff --git a/test/CodeGen/Thumb2/thumb2-ldr_pre.ll b/test/CodeGen/Thumb2/thumb2-ldr_pre.ll
index 9cc3f4a2eda5..601c0b560800 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr_pre.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr_pre.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
-; RUN: grep {ldr.*\\!} | count 3
+; RUN: grep "ldr.*\!" | count 3
; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
-; RUN: grep {ldrsb.*\\!} | count 1
+; RUN: grep "ldrsb.*\!" | count 1
define i32* @test1(i32* %X, i32* %dest) {
%Y = getelementptr i32* %X, i32 4 ; <i32*> [#uses=2]
diff --git a/test/CodeGen/Thumb2/thumb2-rev16.ll b/test/CodeGen/Thumb2/thumb2-rev16.ll
index 39b6ac3f0027..10cd5391a48d 100644
--- a/test/CodeGen/Thumb2/thumb2-rev16.ll
+++ b/test/CodeGen/Thumb2/thumb2-rev16.ll
@@ -1,7 +1,7 @@
; XFAIL: *
; fixme rev16 pattern is not matching
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {rev16\\W*r\[0-9\]*,\\W*r\[0-9\]*} | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep "rev16\W*r[0-9]*,\W*r[0-9]*" | count 1
; 0xff00ff00 = 4278255360
; 0x00ff00ff = 16711935
diff --git a/test/CodeGen/Thumb2/thumb2-ror.ll b/test/CodeGen/Thumb2/thumb2-ror.ll
index 590c333b3d1a..5ad92cd7290d 100644
--- a/test/CodeGen/Thumb2/thumb2-ror.ll
+++ b/test/CodeGen/Thumb2/thumb2-ror.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
-
+; RUN: llc < %s -march=thumb | FileCheck %s -check-prefix=THUMB1
; CHECK: f1:
; CHECK: ror.w r0, r0, #22
@@ -13,6 +13,8 @@ define i32 @f1(i32 %a) {
; CHECK: f2:
; CHECK-NOT: and
; CHECK: ror
+; THUMB1: f2
+; THUMB1: and
define i32 @f2(i32 %v, i32 %nbits) {
entry:
%and = and i32 %nbits, 31
@@ -21,4 +23,4 @@ entry:
%shl = shl i32 %v, %sub
%or = or i32 %shl, %shr
ret i32 %or
-} \ No newline at end of file
+}
diff --git a/test/CodeGen/Thumb2/thumb2-tbb.ll b/test/CodeGen/Thumb2/thumb2-tbb.ll
index 5dc3cc3ce70a..a9d71d6bda15 100644
--- a/test/CodeGen/Thumb2/thumb2-tbb.ll
+++ b/test/CodeGen/Thumb2/thumb2-tbb.ll
@@ -5,7 +5,9 @@ define void @bar(i32 %n.u) {
entry:
; CHECK: bar:
; CHECK: tbb
-; CHECK: .align 1
+; CHECK: .data_region jt8
+; CHECK: .end_data_region
+; CHECK-NEXT: .align 1
switch i32 %n.u, label %bb12 [i32 1, label %bb i32 2, label %bb6 i32 4, label %bb7 i32 5, label %bb8 i32 6, label %bb10 i32 7, label %bb1 i32 8, label %bb3 i32 9, label %bb4 i32 10, label %bb9 i32 11, label %bb2 i32 12, label %bb5 i32 13, label %bb11 ]
bb:
diff --git a/test/CodeGen/Thumb2/thumb2-teq.ll b/test/CodeGen/Thumb2/thumb2-teq.ll
index 00c928fc0782..d453f469abc7 100644
--- a/test/CodeGen/Thumb2/thumb2-teq.ll
+++ b/test/CodeGen/Thumb2/thumb2-teq.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
-; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
-; test as 'mov.w r0, #0'. So far, that requires physreg joining.
+; These tests would be improved by 'movs r0, #0' being rematerialized below the
+; test as 'mov.w r0, #0'.
; 0x000000bb = 187
define i1 @f2(i32 %a) {
@@ -10,7 +10,7 @@ define i1 @f2(i32 %a) {
ret i1 %tmp1
}
; CHECK: f2:
-; CHECK: teq.w r0, #187
+; CHECK: teq.w {{.*}}, #187
; 0x00aa00aa = 11141290
define i1 @f3(i32 %a) {
@@ -19,7 +19,7 @@ define i1 @f3(i32 %a) {
ret i1 %tmp1
}
; CHECK: f3:
-; CHECK: teq.w r0, #11141290
+; CHECK: teq.w {{.*}}, #11141290
; 0xcc00cc00 = 3422604288
define i1 @f6(i32 %a) {
@@ -28,7 +28,7 @@ define i1 @f6(i32 %a) {
ret i1 %tmp1
}
; CHECK: f6:
-; CHECK: teq.w r0, #-872363008
+; CHECK: teq.w {{.*}}, #-872363008
; 0xdddddddd = 3722304989
define i1 @f7(i32 %a) {
@@ -37,7 +37,7 @@ define i1 @f7(i32 %a) {
ret i1 %tmp1
}
; CHECK: f7:
-; CHECK: teq.w r0, #-572662307
+; CHECK: teq.w {{.*}}, #-572662307
; 0xdddddddd = 3722304989
define i1 @f8(i32 %a) {
@@ -53,5 +53,5 @@ define i1 @f10(i32 %a) {
ret i1 %tmp1
}
; CHECK: f10:
-; CHECK: teq.w r0, #1114112
+; CHECK: teq.w {{.*}}, #1114112
diff --git a/test/CodeGen/Thumb2/thumb2-teq2.ll b/test/CodeGen/Thumb2/thumb2-teq2.ll
index 8acae9090f17..27ecad839399 100644
--- a/test/CodeGen/Thumb2/thumb2-teq2.ll
+++ b/test/CodeGen/Thumb2/thumb2-teq2.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
-; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
-; tst as 'mov.w r0, #0'. So far, that requires physreg joining.
+; These tests would be improved by 'movs r0, #0' being rematerialized below the
+; tst as 'mov.w r0, #0'.
define i1 @f2(i32 %a, i32 %b) {
; CHECK: f2
-; CHECK: teq.w r0, r1
+; CHECK: teq.w {{.*}}, r1
%tmp = xor i32 %a, %b
%tmp1 = icmp eq i32 %tmp, 0
ret i1 %tmp1
@@ -13,7 +13,7 @@ define i1 @f2(i32 %a, i32 %b) {
define i1 @f4(i32 %a, i32 %b) {
; CHECK: f4
-; CHECK: teq.w r0, r1
+; CHECK: teq.w {{.*}}, r1
%tmp = xor i32 %a, %b
%tmp1 = icmp eq i32 0, %tmp
ret i1 %tmp1
@@ -21,7 +21,7 @@ define i1 @f4(i32 %a, i32 %b) {
define i1 @f6(i32 %a, i32 %b) {
; CHECK: f6
-; CHECK: teq.w r0, r1, lsl #5
+; CHECK: teq.w {{.*}}, r1, lsl #5
%tmp = shl i32 %b, 5
%tmp1 = xor i32 %a, %tmp
%tmp2 = icmp eq i32 %tmp1, 0
@@ -30,7 +30,7 @@ define i1 @f6(i32 %a, i32 %b) {
define i1 @f7(i32 %a, i32 %b) {
; CHECK: f7
-; CHECK: teq.w r0, r1, lsr #6
+; CHECK: teq.w {{.*}}, r1, lsr #6
%tmp = lshr i32 %b, 6
%tmp1 = xor i32 %a, %tmp
%tmp2 = icmp eq i32 %tmp1, 0
@@ -39,7 +39,7 @@ define i1 @f7(i32 %a, i32 %b) {
define i1 @f8(i32 %a, i32 %b) {
; CHECK: f8
-; CHECK: teq.w r0, r1, asr #7
+; CHECK: teq.w {{.*}}, r1, asr #7
%tmp = ashr i32 %b, 7
%tmp1 = xor i32 %a, %tmp
%tmp2 = icmp eq i32 %tmp1, 0
@@ -48,7 +48,7 @@ define i1 @f8(i32 %a, i32 %b) {
define i1 @f9(i32 %a, i32 %b) {
; CHECK: f9
-; CHECK: teq.w r0, r0, ror #8
+; CHECK: teq.w {{.*}}, {{.*}}, ror #8
%l8 = shl i32 %a, 24
%r8 = lshr i32 %a, 8
%tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-tst.ll b/test/CodeGen/Thumb2/thumb2-tst.ll
index 43e208cc59dc..67fe82ee5202 100644
--- a/test/CodeGen/Thumb2/thumb2-tst.ll
+++ b/test/CodeGen/Thumb2/thumb2-tst.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
-; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
-; tst as 'mov.w r0, #0'. So far, that requires physreg joining.
+; These tests would be improved by 'movs r0, #0' being rematerialized below the
+; tst as 'mov.w r0, #0'.
; 0x000000bb = 187
define i1 @f2(i32 %a) {
@@ -10,7 +10,7 @@ define i1 @f2(i32 %a) {
ret i1 %tmp1
}
; CHECK: f2:
-; CHECK: tst.w r0, #187
+; CHECK: tst.w {{.*}}, #187
; 0x00aa00aa = 11141290
define i1 @f3(i32 %a) {
@@ -19,7 +19,7 @@ define i1 @f3(i32 %a) {
ret i1 %tmp1
}
; CHECK: f3:
-; CHECK: tst.w r0, #11141290
+; CHECK: tst.w {{.*}}, #11141290
; 0xcc00cc00 = 3422604288
define i1 @f6(i32 %a) {
@@ -28,7 +28,7 @@ define i1 @f6(i32 %a) {
ret i1 %tmp1
}
; CHECK: f6:
-; CHECK: tst.w r0, #-872363008
+; CHECK: tst.w {{.*}}, #-872363008
; 0xdddddddd = 3722304989
define i1 @f7(i32 %a) {
@@ -37,7 +37,7 @@ define i1 @f7(i32 %a) {
ret i1 %tmp1
}
; CHECK: f7:
-; CHECK: tst.w r0, #-572662307
+; CHECK: tst.w {{.*}}, #-572662307
; 0x00110000 = 1114112
define i1 @f10(i32 %a) {
@@ -46,4 +46,4 @@ define i1 @f10(i32 %a) {
ret i1 %tmp1
}
; CHECK: f10:
-; CHECK: tst.w r0, #1114112
+; CHECK: tst.w {{.*}}, #1114112
diff --git a/test/CodeGen/Thumb2/thumb2-tst2.ll b/test/CodeGen/Thumb2/thumb2-tst2.ll
index bfe016fc8d6f..e3fe792005f8 100644
--- a/test/CodeGen/Thumb2/thumb2-tst2.ll
+++ b/test/CodeGen/Thumb2/thumb2-tst2.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 -join-physregs | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
-; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
-; tst as 'mov.w r0, #0'. So far, that requires physreg joining.
+; These tests would be improved by 'movs r0, #0' being rematerialized below the
+; tst as 'mov.w r0, #0'.
define i1 @f2(i32 %a, i32 %b) {
; CHECK: f2:
-; CHECK: tst r0, r1
+; CHECK: tst {{.*}}, r1
%tmp = and i32 %a, %b
%tmp1 = icmp eq i32 %tmp, 0
ret i1 %tmp1
@@ -13,7 +13,7 @@ define i1 @f2(i32 %a, i32 %b) {
define i1 @f4(i32 %a, i32 %b) {
; CHECK: f4:
-; CHECK: tst r0, r1
+; CHECK: tst {{.*}}, r1
%tmp = and i32 %a, %b
%tmp1 = icmp eq i32 0, %tmp
ret i1 %tmp1
@@ -21,7 +21,7 @@ define i1 @f4(i32 %a, i32 %b) {
define i1 @f6(i32 %a, i32 %b) {
; CHECK: f6:
-; CHECK: tst.w r0, r1, lsl #5
+; CHECK: tst.w {{.*}}, r1, lsl #5
%tmp = shl i32 %b, 5
%tmp1 = and i32 %a, %tmp
%tmp2 = icmp eq i32 %tmp1, 0
@@ -30,7 +30,7 @@ define i1 @f6(i32 %a, i32 %b) {
define i1 @f7(i32 %a, i32 %b) {
; CHECK: f7:
-; CHECK: tst.w r0, r1, lsr #6
+; CHECK: tst.w {{.*}}, r1, lsr #6
%tmp = lshr i32 %b, 6
%tmp1 = and i32 %a, %tmp
%tmp2 = icmp eq i32 %tmp1, 0
@@ -39,7 +39,7 @@ define i1 @f7(i32 %a, i32 %b) {
define i1 @f8(i32 %a, i32 %b) {
; CHECK: f8:
-; CHECK: tst.w r0, r1, asr #7
+; CHECK: tst.w {{.*}}, r1, asr #7
%tmp = ashr i32 %b, 7
%tmp1 = and i32 %a, %tmp
%tmp2 = icmp eq i32 %tmp1, 0
@@ -48,7 +48,7 @@ define i1 @f8(i32 %a, i32 %b) {
define i1 @f9(i32 %a, i32 %b) {
; CHECK: f9:
-; CHECK: tst.w r0, r0, ror #8
+; CHECK: tst.w {{.*}}, {{.*}}, ror #8
%l8 = shl i32 %a, 24
%r8 = lshr i32 %a, 8
%tmp = or i32 %l8, %r8
diff --git a/test/CodeGen/Thumb2/thumb2-uxt_rot.ll b/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
index 03189aa7e15f..61e849ef4a43 100644
--- a/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
+++ b/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
@@ -1,15 +1,22 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s
+; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s --check-prefix=A8
+; RUN: llc < %s -march=thumb -mcpu=cortex-m3 | FileCheck %s --check-prefix=M3
+; rdar://11318438
define zeroext i8 @test1(i32 %A.u) {
-; CHECK: test1
-; CHECK: uxtb r0, r0
+; A8: test1
+; A8: uxtb r0, r0
%B.u = trunc i32 %A.u to i8
ret i8 %B.u
}
define zeroext i32 @test2(i32 %A.u, i32 %B.u) {
-; CHECK: test2
-; CHECK: uxtab r0, r0, r1
+; A8: test2
+; A8: uxtab r0, r0, r1
+
+; M3: test2
+; M3: uxtb r1, r1
+; M3-NOT: uxtab
+; M3: add r0, r1
%C.u = trunc i32 %B.u to i8
%D.u = zext i8 %C.u to i32
%E.u = add i32 %A.u, %D.u
@@ -17,8 +24,8 @@ define zeroext i32 @test2(i32 %A.u, i32 %B.u) {
}
define zeroext i32 @test3(i32 %A.u) {
-; CHECK: test3
-; CHECK: uxth.w r0, r0, ror #8
+; A8: test3
+; A8: uxth.w r0, r0, ror #8
%B.u = lshr i32 %A.u, 8
%C.u = shl i32 %A.u, 24
%D.u = or i32 %B.u, %C.u
diff --git a/test/CodeGen/Thumb2/tls1.ll b/test/CodeGen/Thumb2/tls1.ll
index 1e555571c054..d91e3b32f9b7 100644
--- a/test/CodeGen/Thumb2/tls1.ll
+++ b/test/CodeGen/Thumb2/tls1.ll
@@ -1,9 +1,9 @@
; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi | \
-; RUN: grep {i(tpoff)}
+; RUN: grep "i(tpoff)"
; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi | \
-; RUN: grep {__aeabi_read_tp}
+; RUN: grep "__aeabi_read_tp"
; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi \
-; RUN: -relocation-model=pic | grep {__tls_get_addr}
+; RUN: -relocation-model=pic | grep "__tls_get_addr"
@i = thread_local global i32 15 ; <i32*> [#uses=2]
diff --git a/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll b/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
index 24848602baf8..0af2445d7fba 100644
--- a/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
+++ b/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
@@ -3,7 +3,7 @@
; it makes a ton of annoying overlapping live ranges. This code should not
; cause spills!
;
-; RUN: llc < %s -march=x86 -stats |& not grep spilled
+; RUN: llc < %s -march=x86 -stats 2>&1 | not grep spilled
target datalayout = "e-p:32:32"
diff --git a/test/CodeGen/X86/2003-11-03-GlobalBool.ll b/test/CodeGen/X86/2003-11-03-GlobalBool.ll
index 8b0a18550da1..f201b981a872 100644
--- a/test/CodeGen/X86/2003-11-03-GlobalBool.ll
+++ b/test/CodeGen/X86/2003-11-03-GlobalBool.ll
@@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 | \
-; RUN: not grep {.byte\[\[:space:\]\]*true}
+; RUN: not grep ".byte[[:space:]]*true"
@X = global i1 true ; <i1*> [#uses=0]
diff --git a/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll b/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll
index fea2b54d7630..dde210b776af 100644
--- a/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll
+++ b/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 | grep {(%esp}
-; RUN: llc < %s -march=x86 | grep {pushl %ebp} | count 1
-; RUN: llc < %s -march=x86 | grep {popl %ebp} | count 1
+; RUN: llc < %s -march=x86 | grep "(%esp"
+; RUN: llc < %s -march=x86 | grep "pushl %ebp" | count 1
+; RUN: llc < %s -march=x86 | grep "popl %ebp" | count 1
declare i8* @llvm.returnaddress(i32)
diff --git a/test/CodeGen/X86/2004-03-30-Select-Max.ll b/test/CodeGen/X86/2004-03-30-Select-Max.ll
index c44d10ac5b5a..e22aa6a09398 100644
--- a/test/CodeGen/X86/2004-03-30-Select-Max.ll
+++ b/test/CodeGen/X86/2004-03-30-Select-Max.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah | not grep {j\[lgbe\]}
+; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+; CHECK-NOT: {{j[lgbe]}}
define i32 @max(i32 %A, i32 %B) nounwind {
%gt = icmp sgt i32 %A, %B ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll b/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll
index dc69ef83103f..f8bf0991fb14 100644
--- a/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll
+++ b/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | not grep {subl.*%esp}
+; RUN: llc < %s -march=x86 | not grep "subl.*%esp"
define i32 @f(i32 %a, i32 %b) {
%tmp.2 = mul i32 %a, %a ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll b/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll
index 0421896922b9..1a3d74918d1a 100644
--- a/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll
+++ b/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -stats |& \
+; RUN: llc < %s -march=x86 -stats 2>&1 | \
; RUN: grep asm-printer | grep 7
define i32 @g(i32 %a, i32 %b) nounwind {
diff --git a/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll b/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
index 8783a11c060b..fb1262a37295 100644
--- a/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
+++ b/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin8 -relocation-model=static > %t
-; RUN: grep {movl _last} %t | count 1
-; RUN: grep {cmpl.*_last} %t | count 1
+; RUN: grep "movl _last" %t | count 1
+; RUN: grep "cmpl.*_last" %t | count 1
@block = external global i8* ; <i8**> [#uses=1]
@last = external global i32 ; <i32*> [#uses=3]
diff --git a/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll b/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
index b0453299669e..5cba3efeefb8 100644
--- a/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
+++ b/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah -stats |& \
-; RUN: not grep {Number of register spills}
+; RUN: llc < %s -march=x86 -mcpu=yonah -stats 2>&1 | \
+; RUN: not grep "Number of register spills"
; END.
diff --git a/test/CodeGen/X86/2006-05-02-InstrSched1.ll b/test/CodeGen/X86/2006-05-02-InstrSched1.ll
index 7d0a6ab0a04c..1c75f93915a7 100644
--- a/test/CodeGen/X86/2006-05-02-InstrSched1.ll
+++ b/test/CodeGen/X86/2006-05-02-InstrSched1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -relocation-model=static -stats |& \
+; RUN: llc < %s -march=x86 -relocation-model=static -stats 2>&1 | \
; RUN: grep asm-printer | grep 14
;
@size20 = external global i32 ; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/2006-05-02-InstrSched2.ll b/test/CodeGen/X86/2006-05-02-InstrSched2.ll
index 23954d76a5d6..95eefa1e7196 100644
--- a/test/CodeGen/X86/2006-05-02-InstrSched2.ll
+++ b/test/CodeGen/X86/2006-05-02-InstrSched2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -stats |& \
+; RUN: llc < %s -march=x86 -stats 2>&1 | \
; RUN: grep asm-printer | grep 13
define void @_ZN9__gnu_cxx9hashtableISt4pairIKPKciES3_NS_4hashIS3_EESt10_Select1stIS5_E5eqstrSaIiEE14find_or_insertERKS5__cond_true456.i(i8* %tmp435.i, i32* %tmp449.i.out) nounwind {
diff --git a/test/CodeGen/X86/2006-05-08-InstrSched.ll b/test/CodeGen/X86/2006-05-08-InstrSched.ll
index d58d638562c9..3419d01fa083 100644
--- a/test/CodeGen/X86/2006-05-08-InstrSched.ll
+++ b/test/CodeGen/X86/2006-05-08-InstrSched.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -relocation-model=static | not grep {subl.*%esp}
+; RUN: llc < %s -march=x86 -relocation-model=static | not grep "subl.*%esp"
@A = external global i16* ; <i16**> [#uses=1]
@B = external global i32 ; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll
index 38bca283b132..37c510786a5e 100644
--- a/test/CodeGen/X86/2006-05-11-InstrSched.ll
+++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=+sse2 -stats -realign-stack=0 |&\
-; RUN: grep {asm-printer} | grep 35
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mcpu=penryn -mattr=+sse2 -stats -realign-stack=0 2>&1 | \
+; RUN: grep "asm-printer" | grep 35
target datalayout = "e-p:32:32"
define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind {
diff --git a/test/CodeGen/X86/2006-07-31-SingleRegClass.ll b/test/CodeGen/X86/2006-07-31-SingleRegClass.ll
index 3159cec8553e..c5c74d104863 100644
--- a/test/CodeGen/X86/2006-07-31-SingleRegClass.ll
+++ b/test/CodeGen/X86/2006-07-31-SingleRegClass.ll
@@ -1,7 +1,7 @@
; PR850
; RUN: llc < %s -march=x86 -x86-asm-syntax=att > %t
-; RUN: grep {movl 4(%eax),%ebp} %t
-; RUN: grep {movl 0(%eax), %ebx} %t
+; RUN: grep "movl 4(%eax),%ebp" %t
+; RUN: grep "movl 0(%eax), %ebx" %t
define i32 @foo(i32 %__s.i.i, i32 %tmp5.i.i, i32 %tmp6.i.i, i32 %tmp7.i.i, i32 %tmp8.i.i) {
%tmp9.i.i = call i32 asm sideeffect "push %ebp\0Apush %ebx\0Amovl 4($2),%ebp\0Amovl 0($2), %ebx\0Amovl $1,%eax\0Aint $$0x80\0Apop %ebx\0Apop %ebp", "={ax},i,0,{cx},{dx},{si},{di}"( i32 192, i32 %__s.i.i, i32 %tmp5.i.i, i32 %tmp6.i.i, i32 %tmp7.i.i, i32 %tmp8.i.i ) ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll b/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll
index a19d8f7092c3..56d5f2f3040a 100644
--- a/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll
+++ b/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86 -mcpu=i386 | \
-; RUN: not grep {movl %eax, %edx}
+; RUN: not grep "movl %eax, %edx"
define i32 @foo(i32 %t, i32 %C) {
entry:
diff --git a/test/CodeGen/X86/2006-11-12-CSRetCC.ll b/test/CodeGen/X86/2006-11-12-CSRetCC.ll
index 6ec9a488494a..a58c9b102d13 100644
--- a/test/CodeGen/X86/2006-11-12-CSRetCC.ll
+++ b/test/CodeGen/X86/2006-11-12-CSRetCC.ll
@@ -52,8 +52,8 @@ entry:
%tmp21 = load double* %tmp20 ; <double> [#uses=1]
%tmp.upgrd.6 = getelementptr [9 x i8]* @str, i32 0, i64 0 ; <i8*> [#uses=1]
%tmp.upgrd.7 = call i32 (i8*, ...)* @printf( i8* %tmp.upgrd.6, double %tmp21, double %tmp19 ) ; <i32> [#uses=0]
- br label %return
-return: ; preds = %entry
+ br label %finish
+finish:
%retval.upgrd.8 = load i32* %retval ; <i32> [#uses=1]
ret i32 %retval.upgrd.8
}
diff --git a/test/CodeGen/X86/2006-11-17-IllegalMove.ll b/test/CodeGen/X86/2006-11-17-IllegalMove.ll
index affb7afb1c51..783d9f94caeb 100644
--- a/test/CodeGen/X86/2006-11-17-IllegalMove.ll
+++ b/test/CodeGen/X86/2006-11-17-IllegalMove.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -march=x86-64 > %t
-; RUN: grep movb %t | count 2
-; RUN: grep {movzb\[wl\]} %t
+; RUN: grep movb %t | count 1
+; RUN: grep "movzb[wl]" %t
define void @handle_vector_size_attribute() nounwind {
diff --git a/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll b/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
index a2288986362e..04d4b8ee57eb 100644
--- a/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
+++ b/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86-64 > %t
-; RUN: not grep {,%rsp)} %t
+; RUN: not grep ",%rsp)" %t
; PR1103
target datalayout = "e-p:64:64"
diff --git a/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll b/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll
index 3312e01b3d8e..3b2e443d7d4e 100644
--- a/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll
+++ b/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep {mov %gs:72, %eax}
+; RUN: llc < %s -march=x86 | grep "mov %gs:72, %eax"
target datalayout = "e-p:32:32"
target triple = "i686-apple-darwin9"
diff --git a/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll b/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll
index c1b1ad1c730d..18b06dc0857c 100644
--- a/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll
+++ b/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mcpu=yonah -march=x86 | \
-; RUN: grep {cmpltsd %xmm0, %xmm0}
+; RUN: grep "cmpltsd %xmm0, %xmm0"
target datalayout = "e-p:32:32"
target triple = "i686-apple-darwin9"
diff --git a/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll b/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll
index 85a2ecc959ab..cae68c9f3a1b 100644
--- a/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll
+++ b/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | not grep {bsrl.*10}
+; RUN: llc < %s | not grep "bsrl.*10"
; PR1356
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/X86/2007-05-07-InvokeSRet.ll b/test/CodeGen/X86/2007-05-07-InvokeSRet.ll
index deb39998a3ab..c3d7e8a05472 100644
--- a/test/CodeGen/X86/2007-05-07-InvokeSRet.ll
+++ b/test/CodeGen/X86/2007-05-07-InvokeSRet.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i686-pc-linux-gnu -disable-fp-elim | not grep {addl .12, %esp}
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -disable-fp-elim | not grep "addl .12, %esp"
; PR1398
%struct.S = type { i32, i32 }
diff --git a/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll b/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll
index 77291f063b79..aa0ee5d07462 100644
--- a/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll
+++ b/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep {movsbl}
+; RUN: llc < %s -march=x86 | grep "movsbl"
@X = global i32 0 ; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/2007-09-05-InvalidAsm.ll b/test/CodeGen/X86/2007-09-05-InvalidAsm.ll
index 5acb05134c7c..e81534b0110b 100644
--- a/test/CodeGen/X86/2007-09-05-InvalidAsm.ll
+++ b/test/CodeGen/X86/2007-09-05-InvalidAsm.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -x86-asm-syntax=intel | not grep {lea\[\[:space:\]\]R}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -x86-asm-syntax=intel | FileCheck %s
+; CHECK-NOT: lea R
%struct.AGenericCall = type { %struct.AGenericManager*, %struct.ComponentParameters*, i32* }
%struct.AGenericManager = type <{ i8 }>
diff --git a/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll b/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll
index 228a915e3e5a..56a109acfc79 100644
--- a/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll
+++ b/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -relocation-model=static | grep {foo str$}
+; RUN: llc < %s -relocation-model=static | grep "foo str$"
; PR1761
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-pc-linux"
diff --git a/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll b/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
index 2e95082afa9c..99df20da2510 100644
--- a/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
+++ b/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=generic | grep {(%esp)} | count 2
+; RUN: llc < %s -march=x86 -mcpu=generic | grep "(%esp)" | count 2
; PR1872
%struct.c34007g__designated___XUB = type { i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll b/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
index 266fd7b91325..39af9319c8d1 100644
--- a/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
+++ b/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
@@ -10,10 +10,10 @@
%struct.indexentry = type { i32, i8*, i8*, i8*, i8*, i8* }
-define i32 @_bfd_stab_section_find_nearest_line(i32 %offset) nounwind {
+define i32 @_bfd_stab_section_find_nearest_line(i32 %offset, i1 %cond) nounwind {
entry:
%tmp910 = add i32 0, %offset ; <i32> [#uses=1]
- br i1 true, label %bb951, label %bb917
+ br i1 %cond, label %bb951, label %bb917
bb917: ; preds = %entry
ret i32 0
@@ -21,7 +21,7 @@ bb917: ; preds = %entry
bb951: ; preds = %bb986, %entry
%tmp955 = sdiv i32 0, 2 ; <i32> [#uses=3]
%tmp961 = getelementptr %struct.indexentry* null, i32 %tmp955, i32 0 ; <i32*> [#uses=1]
- br i1 true, label %bb986, label %bb967
+ br i1 %cond, label %bb986, label %bb967
bb967: ; preds = %bb951
ret i32 0
diff --git a/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll b/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
index 0091397ca6b0..9584b718fea0 100644
--- a/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
+++ b/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -regalloc=fast
+; RUN: llc < %s -march=x86 -mattr=+sse2 -regalloc=fast -optimize-regalloc=0
define void @SolveCubic(double %a, double %b, double %c, double %d, i32* %solutions, double* %x) {
entry:
diff --git a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
index bdacf5071128..a1b973d7ccfa 100644
--- a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
+++ b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah -stats |& grep {Number of block tails merged} | grep 16
+; RUN: llc < %s -march=x86 -mcpu=yonah -stats 2>&1 | grep "Number of block tails merged" | grep 16
; PR1909
@.str = internal constant [48 x i8] c"transformed bounds: (%.2f, %.2f), (%.2f, %.2f)\0A\00" ; <[48 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll b/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
index 5115e48365fc..a52b36588a36 100644
--- a/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
+++ b/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s | grep {a:} | not grep ax
-; RUN: llc < %s | grep {b:} | not grep ax
+; RUN: llc < %s | grep "a:" | not grep ax
+; RUN: llc < %s | grep "b:" | not grep ax
; PR2078
; The clobber list says that "ax" is clobbered. Make sure that eax isn't
; allocated to the input/output register.
diff --git a/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll b/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll
index da029079c6ff..9185a3671184 100644
--- a/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll
+++ b/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -regalloc=fast -march=x86 -mattr=+mmx | grep esi
+; RUN: llc < %s -regalloc=fast -optimize-regalloc=0 -march=x86 -mattr=+mmx | grep esi
; PR2082
; Local register allocator was refusing to use ESI, EDI, and EBP so it ran out of
; registers.
diff --git a/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll b/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll
index 4dc3a10f4647..5ca7e3ed3dbf 100644
--- a/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll
+++ b/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -asm-verbose | grep {#} | not grep -v {##}
+; RUN: llc < %s -mtriple=i386-apple-darwin -asm-verbose | grep "#" | not grep -v "##"
%struct.AGenericCall = type { %struct.AGenericManager*, %struct.ComponentParameters*, i32* }
%struct.AGenericManager = type <{ i8 }>
diff --git a/test/CodeGen/X86/2008-04-16-ReMatBug.ll b/test/CodeGen/X86/2008-04-16-ReMatBug.ll
index 109069e35365..3a1de11ea21b 100644
--- a/test/CodeGen/X86/2008-04-16-ReMatBug.ll
+++ b/test/CodeGen/X86/2008-04-16-ReMatBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -disable-cgp-branch-opts | grep movw | not grep {, %e}
+; RUN: llc < %s -mtriple=i386-apple-darwin -disable-cgp-branch-opts | grep movw | not grep ", %e"
%struct.DBC_t = type { i32, i8*, i16, %struct.DBC_t*, i8*, i8*, i8*, i8*, i8*, %struct.DBC_t*, i32, i32, i32, i32, i8*, i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i16, i16, i32*, i8, i16, %struct.DRVOPT*, i16 }
%struct.DRVOPT = type { i16, i32, i8, %struct.DRVOPT* }
diff --git a/test/CodeGen/X86/2008-04-17-CoalescerBug.ll b/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
index 859041eb81ff..f244793e7a95 100644
--- a/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin | grep xorl | grep {%e}
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep xorl | grep "%e"
; Make sure xorl operands are 32-bit registers.
%struct.tm = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8* }
diff --git a/test/CodeGen/X86/2008-04-28-CoalescerBug.ll b/test/CodeGen/X86/2008-04-28-CoalescerBug.ll
index 5b97eb71cbfd..7c04206de72f 100644
--- a/test/CodeGen/X86/2008-04-28-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-04-28-CoalescerBug.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movl > %t
-; RUN: not grep {r\[abcd\]x} %t
-; RUN: not grep {r\[ds\]i} %t
-; RUN: not grep {r\[bs\]p} %t
+; RUN: not grep "r[abcd]x" %t
+; RUN: not grep "r[ds]i" %t
+; RUN: not grep "r[bs]p" %t
%struct.BITMAP = type { i16, i16, i32, i32, i32, i32, i32, i32, i8*, i8* }
%struct.BltData = type { float, float, float, float }
diff --git a/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll b/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
index c068f8ac632c..4e73b5aa1cdb 100644
--- a/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
+++ b/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -regalloc=fast
+; RUN: llc < %s -mtriple=i386-apple-darwin -regalloc=fast -optimize-regalloc=0
@_ZTVN10Evaluation10GridOutputILi3EEE = external constant [5 x i32 (...)*] ; <[5 x i32 (...)*]*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-08-06-CmpStride.ll b/test/CodeGen/X86/2008-08-06-CmpStride.ll
index 99cb8569b3f4..bdac8fd48422 100644
--- a/test/CodeGen/X86/2008-08-06-CmpStride.ll
+++ b/test/CodeGen/X86/2008-08-06-CmpStride.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=x86-64 < %s -o - | grep {cmpl \\$\[1\], %}
+; RUN: llc -march=x86-64 < %s -o - | grep "cmpl \$[1], %"
@.str = internal constant [4 x i8] c"%d\0A\00"
diff --git a/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll b/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll
index 1d27fc53ea5e..c63c890add50 100644
--- a/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll
+++ b/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll
@@ -1,15 +1,36 @@
; Check that eh_return & unwind_init were properly lowered
-; RUN: llc < %s | grep %ebp | count 9
-; RUN: llc < %s | grep %ecx | count 5
+; RUN: llc < %s -verify-machineinstrs | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
target triple = "i386-pc-linux"
-define i8* @test(i32 %a, i8* %b) {
+; CHECK: test1
+; CHECK: pushl %ebp
+define i8* @test1(i32 %a, i8* %b) {
entry:
call void @llvm.eh.unwind.init()
%foo = alloca i32
call void @llvm.eh.return.i32(i32 %a, i8* %b)
+; CHECK: movl 12(%ebp), %[[ECX:e..]]
+; CHECK: movl 8(%ebp), %[[EAX:e..]]
+; CHECK: movl %[[ECX]], 4(%ebp,%[[EAX]])
+; CHECK: leal 4(%ebp,%[[EAX]]), %[[ECX2:e..]]
+; CHECK: movl %[[ECX2]], %esp
+; CHECK: ret
+ unreachable
+}
+
+; CHECK: test2
+; CHECK: pushl %ebp
+define i8* @test2(i32 %a, i8* %b) {
+entry:
+ call void @llvm.eh.return.i32(i32 %a, i8* %b)
+; CHECK: movl 12(%ebp), %[[ECX:e..]]
+; CHECK: movl 8(%ebp), %[[EAX:e..]]
+; CHECK: movl %[[ECX]], 4(%ebp,%[[EAX]])
+; CHECK: leal 4(%ebp,%[[EAX]]), %[[ECX2:e..]]
+; CHECK: movl %[[ECX2]], %esp
+; CHECK: ret
unreachable
}
diff --git a/test/CodeGen/X86/2008-09-17-inline-asm-1.ll b/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
index 86e50c98bfdb..4b2774b64b7b 100644
--- a/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
+++ b/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86 | FileCheck %s
-; RUN: llc < %s -march=x86 -regalloc=fast | FileCheck %s
+; RUN: llc < %s -march=x86 -regalloc=fast -optimize-regalloc=0 | FileCheck %s
; %0 must not be put in EAX or EDX.
; In the first asm, $0 and $2 must not be put in EAX.
diff --git a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
index 6867ae798087..5c2fbeee5c70 100644
--- a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
+++ b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -regalloc=fast | FileCheck %s
+; RUN: llc < %s -march=x86 -regalloc=fast -optimize-regalloc=0 | FileCheck %s
; RUN: llc < %s -march=x86 -regalloc=basic | FileCheck %s
; RUN: llc < %s -march=x86 -regalloc=greedy | FileCheck %s
diff --git a/test/CodeGen/X86/2008-10-24-FlippedCompare.ll b/test/CodeGen/X86/2008-10-24-FlippedCompare.ll
index 421b931ecd5a..e504bc3e776c 100644
--- a/test/CodeGen/X86/2008-10-24-FlippedCompare.ll
+++ b/test/CodeGen/X86/2008-10-24-FlippedCompare.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -o - | not grep {ucomiss\[^,\]*esp}
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o - | not grep "ucomiss[^,]*esp"
define void @f(float %wt) {
entry:
diff --git a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
index 9d144a4be0e9..b2cf34cd2033 100644
--- a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -stats |& FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -stats 2>&1 | FileCheck %s
; Now this test spills one register. But a reload in the loop is cheaper than
; the divsd so it's a win.
@@ -17,8 +17,7 @@ bb: ; preds = %bb, %entry
; CHECK: %bb30.loopexit
; CHECK: divsd %xmm0
; CHECK: movsd %xmm0, 16(%esp)
-; CHECK: .align
-; CHECK-NEXT: %bb3
+; CHECK: %bb3
bb3: ; preds = %bb30.loopexit, %bb25, %bb3
%2 = load i32* null, align 4 ; <i32> [#uses=1]
%3 = mul i32 %2, 0 ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2008-12-23-crazy-address.ll b/test/CodeGen/X86/2008-12-23-crazy-address.ll
index 2edcaea80ce7..0e95c9e34e1c 100644
--- a/test/CodeGen/X86/2008-12-23-crazy-address.ll
+++ b/test/CodeGen/X86/2008-12-23-crazy-address.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -relocation-model=static | grep {lea.*X.*esp} | count 2
+; RUN: llc < %s -march=x86 -relocation-model=static | grep "lea.*X.*esp" | count 2
@X = external global [0 x i32]
diff --git a/test/CodeGen/X86/2009-01-31-BigShift2.ll b/test/CodeGen/X86/2009-01-31-BigShift2.ll
index 3e425536d1b9..b478f27a95b9 100644
--- a/test/CodeGen/X86/2009-01-31-BigShift2.ll
+++ b/test/CodeGen/X86/2009-01-31-BigShift2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep {mov.*56}
+; RUN: llc < %s -march=x86 | grep "mov.*56"
; PR3449
define void @test(<8 x double>* %P, i64* %Q) nounwind {
diff --git a/test/CodeGen/X86/2009-02-25-CommuteBug.ll b/test/CodeGen/X86/2009-02-25-CommuteBug.ll
index 7ea699833ba8..9cbf35094061 100644
--- a/test/CodeGen/X86/2009-02-25-CommuteBug.ll
+++ b/test/CodeGen/X86/2009-02-25-CommuteBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& not grep commuted
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats 2>&1 | not grep commuted
; rdar://6608609
define <2 x double> @t(<2 x double> %A, <2 x double> %B, <2 x double> %C) nounwind readnone {
diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
index 0b5b7bdd94d7..d50fe6f73a00 100644
--- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
+++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {8 machine-licm}
-; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn -stats 2>&1 | grep "5 machine-licm"
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn | FileCheck %s
; rdar://6627786
; rdar://7792037
diff --git a/test/CodeGen/X86/2009-03-12-CPAlignBug.ll b/test/CodeGen/X86/2009-03-12-CPAlignBug.ll
index 3564f01a7c43..847a43fb06a1 100644
--- a/test/CodeGen/X86/2009-03-12-CPAlignBug.ll
+++ b/test/CodeGen/X86/2009-03-12-CPAlignBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | not grep {.space}
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | not grep ".space"
; rdar://6668548
declare double @llvm.sqrt.f64(double) nounwind readonly
diff --git a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
index 8bbdb0e82f78..d934ec9a88f8 100644
--- a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
+++ b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static -o /dev/null -stats -info-output-file - > %t
; RUN: not grep spill %t
-; RUN: not grep {%rsp} %t
-; RUN: not grep {%rbp} %t
+; RUN: not grep "%rsp" %t
+; RUN: not grep "%rbp" %t
; The register-pressure scheduler should be able to schedule this in a
; way that does not require spills.
diff --git a/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll b/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
index f46eed4769f7..ad18a0c5b94d 100644
--- a/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
+++ b/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of modref unfolded}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats 2>&1 | grep "Number of modref unfolded"
; XFAIL: *
; 69408 removed the opportunity for this optimization to work
diff --git a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
index 9f5a8c53be18..5cb05e8a796f 100644
--- a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
+++ b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
@@ -1,5 +1,5 @@
; RUN: llc -mtriple=i386-apple-darwin10.0 -relocation-model=pic -asm-verbose=false \
-; RUN: -disable-fp-elim -mattr=-sse41,-sse3,+sse2 -post-RA-scheduler=false -regalloc=basic < %s | \
+; RUN: -mcpu=generic -disable-fp-elim -mattr=-sse41,-sse3,+sse2 -post-RA-scheduler=false -regalloc=basic < %s | \
; RUN: FileCheck %s
; rdar://6808032
diff --git a/test/CodeGen/X86/2009-04-24.ll b/test/CodeGen/X86/2009-04-24.ll
index d6ed0c42230d..08bf9e3f9f36 100644
--- a/test/CodeGen/X86/2009-04-24.ll
+++ b/test/CodeGen/X86/2009-04-24.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -regalloc=fast -relocation-model=pic > %t2
-; RUN: grep {leaq.*TLSGD} %t2
-; RUN: grep {__tls_get_addr} %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -regalloc=fast -optimize-regalloc=0 -relocation-model=pic > %t2
+; RUN: grep "leaq.*TLSGD" %t2
+; RUN: grep "__tls_get_addr" %t2
; PR4004
@i = thread_local global i32 15
diff --git a/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll b/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
index a2fd2e4c51c9..a6ed74ba2ee9 100644
--- a/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
+++ b/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | grep {movl.*%ebx, 8(%esi)}
+; RUN: llc < %s | grep "movl.*%ebx, 8(%esi)"
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i386-apple-darwin9.0"
diff --git a/test/CodeGen/X86/2009-05-30-ISelBug.ll b/test/CodeGen/X86/2009-05-30-ISelBug.ll
index af552d4ce20d..fe04272082c9 100644
--- a/test/CodeGen/X86/2009-05-30-ISelBug.ll
+++ b/test/CodeGen/X86/2009-05-30-ISelBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | not grep {movzbl %\[abcd\]h,}
+; RUN: llc < %s -march=x86-64 | not grep "movzbl %[abcd]h,"
define void @BZ2_bzDecompress_bb5_2E_outer_bb35_2E_i_bb54_2E_i(i32*, i32 %c_nblock_used.2.i, i32 %.reload51, i32* %.out, i32* %.out1, i32* %.out2, i32* %.out3) nounwind {
newFuncRoot:
diff --git a/test/CodeGen/X86/20090313-signext.ll b/test/CodeGen/X86/20090313-signext.ll
index de930d512678..b8effa677355 100644
--- a/test/CodeGen/X86/20090313-signext.ll
+++ b/test/CodeGen/X86/20090313-signext.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -march=x86-64 -relocation-model=pic > %t
-; RUN: grep {movswl %ax, %edi} %t
-; RUN: grep {movw (%rax), %ax} %t
+; RUN: grep "movswl %ax, %edi" %t
+; RUN: grep "movw (%rax), %ax" %t
; XFAIL: *
@x = common global i16 0
diff --git a/test/CodeGen/X86/2010-01-19-OptExtBug.ll b/test/CodeGen/X86/2010-01-19-OptExtBug.ll
index cd8960b9ed7b..eb4a5c04a2ae 100644
--- a/test/CodeGen/X86/2010-01-19-OptExtBug.ll
+++ b/test/CodeGen/X86/2010-01-19-OptExtBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin11 -relocation-model=pic -disable-fp-elim -stats |& not grep ext-opt
+; RUN: llc < %s -mtriple=x86_64-apple-darwin11 -relocation-model=pic -disable-fp-elim -stats 2>&1 | not grep ext-opt
define fastcc i8* @S_scan_str(i8* %start, i32 %keep_quoted, i32 %keep_delims) nounwind ssp {
entry:
diff --git a/test/CodeGen/X86/2010-05-06-LocalInlineAsmClobber.ll b/test/CodeGen/X86/2010-05-06-LocalInlineAsmClobber.ll
index 90eb84d1dc40..35f233952df0 100644
--- a/test/CodeGen/X86/2010-05-06-LocalInlineAsmClobber.ll
+++ b/test/CodeGen/X86/2010-05-06-LocalInlineAsmClobber.ll
@@ -1,4 +1,4 @@
-; RUN: llc -regalloc=fast %s -o %t
+; RUN: llc -regalloc=fast -optimize-regalloc=0 %s -o %t
; PR7066
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
diff --git a/test/CodeGen/X86/2010-05-12-FastAllocKills.ll b/test/CodeGen/X86/2010-05-12-FastAllocKills.ll
index 36a99d6f90e7..eb0b150378d6 100644
--- a/test/CodeGen/X86/2010-05-12-FastAllocKills.ll
+++ b/test/CodeGen/X86/2010-05-12-FastAllocKills.ll
@@ -1,4 +1,4 @@
-; RUN: llc -regalloc=fast -verify-machineinstrs < %s
+; RUN: llc -regalloc=fast -optimize-regalloc=0 -verify-machineinstrs < %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-darwin"
diff --git a/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll b/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll
index 4639866afc5e..9b47bb75bf16 100644
--- a/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll
+++ b/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll
@@ -1,4 +1,4 @@
-; RUN: llc -regalloc=fast < %s | FileCheck %s
+; RUN: llc -regalloc=fast -optimize-regalloc=0 < %s | FileCheck %s
; PR7382
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll b/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll
index c6f4b497af10..be10ad5cc206 100644
--- a/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll
+++ b/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll
@@ -12,9 +12,9 @@ declare hidden fastcc void @_ZN3JSCL23returnToThrowTrampolineEPNS_12JSGlobalData
; Avoid hoisting the test above loads or copies
; CHECK: %entry
-; CHECK: cmpq
+; CHECK: test
; CHECK-NOT: mov
-; CHECK: jb
+; CHECK: je
define i32 @cti_op_eq(i8** nocapture %args) nounwind ssp {
entry:
%0 = load i8** null, align 8
diff --git a/test/CodeGen/X86/2011-04-19-sclr-bb.ll b/test/CodeGen/X86/2011-04-19-sclr-bb.ll
new file mode 100644
index 000000000000..771e4b3a0815
--- /dev/null
+++ b/test/CodeGen/X86/2011-04-19-sclr-bb.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck %s
+
+; Make sure that values of illegal types are not scalarized between basic blocks.
+;CHECK: test
+;CHECK-NOT: pinsrw
+;CHECK-NOT: pextrb
+;CHECK: ret
+define void @test(i1 %cond) {
+ENTRY:
+ br label %LOOP
+LOOP:
+ %vec1 = phi <4 x i1> [ %vec1_or_2, %LOOP ], [ zeroinitializer, %ENTRY ]
+ %vec2 = phi <4 x i1> [ %vec2_and_1, %LOOP ], [ zeroinitializer, %ENTRY ]
+ %vec1_or_2 = or <4 x i1> %vec1, %vec2
+ %vec2_and_1 = and <4 x i1> %vec2, %vec1
+ br i1 %cond, label %LOOP, label %EXIT
+
+EXIT:
+ ret void
+}
+
diff --git a/test/CodeGen/X86/2011-06-03-x87chain.ll b/test/CodeGen/X86/2011-06-03-x87chain.ll
index bf7f583aab73..ce63c74fbdfd 100644
--- a/test/CodeGen/X86/2011-06-03-x87chain.ll
+++ b/test/CodeGen/X86/2011-06-03-x87chain.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse | FileCheck %s
define float @chainfail1(i64* nocapture %a, i64* nocapture %b, i32 %x, i32 %y, float* nocapture %f) nounwind uwtable noinline ssp {
entry:
diff --git a/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll b/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll
index a51dad03039e..47ef693cc25e 100644
--- a/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll
+++ b/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -disable-fp-elim -relocation-model=pic -stats |& FileCheck %s
+; RUN: llc < %s -O0 -disable-fp-elim -relocation-model=pic -stats 2>&1 | FileCheck %s
;
; This test should not cause any spilling with RAFast.
;
diff --git a/test/CodeGen/X86/2011-09-18-sse2cmp.ll b/test/CodeGen/X86/2011-09-18-sse2cmp.ll
index 844d674fc9e5..a6f428fdacc3 100644
--- a/test/CodeGen/X86/2011-09-18-sse2cmp.ll
+++ b/test/CodeGen/X86/2011-09-18-sse2cmp.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=x86 -mcpu=yonah -promote-elements -mattr=+sse2,-sse41 | FileCheck %s
+;RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse41 | FileCheck %s
;CHECK: @max
;CHECK: cmplepd
diff --git a/test/CodeGen/X86/2011-09-21-setcc-bug.ll b/test/CodeGen/X86/2011-09-21-setcc-bug.ll
index ed5649c60265..4daf6781495a 100644
--- a/test/CodeGen/X86/2011-09-21-setcc-bug.ll
+++ b/test/CodeGen/X86/2011-09-21-setcc-bug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mcpu=corei7 -promote-elements -mattr=+sse41
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse41
; Make sure we are not crashing on this code.
diff --git a/test/CodeGen/X86/2011-10-11-srl.ll b/test/CodeGen/X86/2011-10-11-srl.ll
index cf9d36f1c48c..6c6d340fd1a4 100644
--- a/test/CodeGen/X86/2011-10-11-srl.ll
+++ b/test/CodeGen/X86/2011-10-11-srl.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -promote-elements -mattr=-sse41
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=-sse41
target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/2011-12-15-vec_shift.ll b/test/CodeGen/X86/2011-12-15-vec_shift.ll
index 6f9188c44268..dc3a08bb4daf 100644
--- a/test/CodeGen/X86/2011-12-15-vec_shift.ll
+++ b/test/CodeGen/X86/2011-12-15-vec_shift.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=x86-64 -mattr=+sse41 < %s | FileCheck %s -check-prefix=CHECK-W-SSE4
-; RUN: llc -march=x86-64 -mattr=-sse41 < %s | FileCheck %s -check-prefix=CHECK-WO-SSE4
+; RUN: llc -march=x86-64 -mattr=+sse41 -mcpu=penryn < %s | FileCheck %s -check-prefix=CHECK-W-SSE4
+; RUN: llc -march=x86-64 -mattr=-sse41 -mcpu=penryn < %s | FileCheck %s -check-prefix=CHECK-WO-SSE4
; Test case for r146671
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.7"
diff --git a/test/CodeGen/X86/2012-02-20-MachineCPBug.ll b/test/CodeGen/X86/2012-02-20-MachineCPBug.ll
index 557d49d82f84..477b4deba820 100644
--- a/test/CodeGen/X86/2012-02-20-MachineCPBug.ll
+++ b/test/CodeGen/X86/2012-02-20-MachineCPBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-macosx -mattr=+sse | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-macosx -mcpu=core2 -mattr=+sse | FileCheck %s
; PR11940: Do not optimize away movb %al, %ch
%struct.APInt = type { i64* }
diff --git a/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll b/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll
index 101eccabbd49..18a331377353 100644
--- a/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll
+++ b/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -stats |& \
-; RUN: not grep {Number of machine instructions hoisted out of loops post regalloc}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -stats 2>&1 | \
+; RUN: not grep "Number of machine instructions hoisted out of loops post regalloc"
; rdar://11095580
diff --git a/test/CodeGen/X86/2012-04-26-sdglue.ll b/test/CodeGen/X86/2012-04-26-sdglue.ll
index 9543587747a6..9a66b670c7af 100644
--- a/test/CodeGen/X86/2012-04-26-sdglue.ll
+++ b/test/CodeGen/X86/2012-04-26-sdglue.ll
@@ -5,7 +5,8 @@
; It's hard to test for the ISEL condition because CodeGen optimizes
; away the bugpointed code. Just ensure the basics are still there.
;CHECK: func:
-;CHECK: vmovups
+;CHECK: vpxor
+;CHECK: vinsertf128
;CHECK: vpshufd
;CHECK: vpshufd
;CHECK: vmulps
diff --git a/test/CodeGen/X86/2012-05-17-TwoAddressBug.ll b/test/CodeGen/X86/2012-05-17-TwoAddressBug.ll
new file mode 100644
index 000000000000..171c3f18dc8b
--- /dev/null
+++ b/test/CodeGen/X86/2012-05-17-TwoAddressBug.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -pre-RA-sched=source | FileCheck %s
+
+; Teach two-address pass to update the "source" map so it doesn't perform a
+; non-profitable commute using outdated info. The test case would still fail
+; because of poor pre-RA schedule. That will be fixed by MI scheduler.
+; rdar://11472010
+define i32 @t(i32 %mask) nounwind readnone ssp {
+entry:
+; CHECK: t:
+; CHECK-NOT: mov
+ %sub = add i32 %mask, -65535
+ %shr = lshr i32 %sub, 23
+ %and = and i32 %mask, 1
+ %add = add i32 %shr, %and
+ ret i32 %add
+}
diff --git a/test/CodeGen/X86/2012-05-19-CoalescerCrash.ll b/test/CodeGen/X86/2012-05-19-CoalescerCrash.ll
new file mode 100644
index 000000000000..837fbc0777f7
--- /dev/null
+++ b/test/CodeGen/X86/2012-05-19-CoalescerCrash.ll
@@ -0,0 +1,122 @@
+; RUN: llc < %s -verify-coalescing
+; PR12892
+;
+; Dead code elimination during coalesing causes a live range to split into two
+; virtual registers. Stale identity copies that had already been joined were
+; interfering with the liveness computations.
+
+target triple = "i386-pc-linux-gnu"
+
+define void @_ZN4llvm17AsmMatcherEmitter3runERNS_11raw_ostreamE() align 2 {
+ invoke void @_ZNK4llvm13CodeGenTarget12getAsmParserEv()
+ to label %1 unwind label %5
+
+; <label>:1 ; preds = %0
+ invoke void @_ZNK4llvm6Record16getValueAsStringENS_9StringRefE()
+ to label %4 unwind label %2
+
+; <label>:2 ; preds = %1
+ %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ cleanup
+ unreachable
+
+; <label>:4 ; preds = %1
+ invoke void @_ZN4llvm18isCurrentDebugTypeEPKc()
+ to label %12 unwind label %7
+
+; <label>:5 ; preds = %0
+ %6 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ cleanup
+ br label %33
+
+; <label>:7 ; preds = %4
+ %8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ cleanup
+ br label %9
+
+; <label>:9 ; preds = %28, %7
+ %10 = phi { i8*, i32 } [ %29, %28 ], [ %8, %7 ]
+ %11 = extractvalue { i8*, i32 } %10, 1
+ invoke fastcc void @_ZN12_GLOBAL__N_114AsmMatcherInfoD2Ev()
+ to label %32 unwind label %35
+
+; <label>:12 ; preds = %4
+ invoke void @_ZNK4llvm13CodeGenTarget10getRegBankEv()
+ to label %13 unwind label %16
+
+; <label>:13 ; preds = %12
+ br label %14
+
+; <label>:14 ; preds = %20, %13
+ %15 = icmp eq i32 undef, 0
+ br i1 %15, label %20, label %18
+
+; <label>:16 ; preds = %12
+ %17 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ cleanup
+ br label %26
+
+; <label>:18 ; preds = %14
+ invoke void @_ZNSs4_Rep9_S_createEjjRKSaIcE()
+ to label %19 unwind label %21
+
+; <label>:19 ; preds = %18
+ unreachable
+
+; <label>:20 ; preds = %14
+ br label %14
+
+; <label>:21 ; preds = %18
+ %22 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ cleanup
+ %23 = extractvalue { i8*, i32 } %22, 1
+ br i1 undef, label %26, label %24
+
+; <label>:24 ; preds = %21
+ br i1 undef, label %25, label %26
+
+; <label>:25 ; preds = %24
+ unreachable
+
+; <label>:26 ; preds = %24, %21, %16
+ %27 = phi i32 [ 0, %16 ], [ %23, %21 ], [ %23, %24 ]
+ invoke void @_ZNSt6vectorISt4pairISsSsESaIS1_EED1Ev()
+ to label %28 unwind label %30
+
+; <label>:28 ; preds = %26
+ %29 = insertvalue { i8*, i32 } undef, i32 %27, 1
+ br label %9
+
+; <label>:30 ; preds = %26
+ %31 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ catch i8* null
+ unreachable
+
+; <label>:32 ; preds = %9
+ br label %33
+
+; <label>:33 ; preds = %32, %5
+ %34 = phi i32 [ undef, %5 ], [ %11, %32 ]
+ unreachable
+
+; <label>:35 ; preds = %9
+ %36 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ catch i8* null
+ unreachable
+}
+
+declare void @_ZNK4llvm13CodeGenTarget12getAsmParserEv()
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_ZNK4llvm6Record16getValueAsStringENS_9StringRefE()
+
+declare void @_ZN4llvm18isCurrentDebugTypeEPKc()
+
+declare fastcc void @_ZN12_GLOBAL__N_114AsmMatcherInfoD2Ev() unnamed_addr inlinehint align 2
+
+declare hidden void @_ZNSt6vectorISt4pairISsSsESaIS1_EED1Ev() unnamed_addr align 2
+
+declare void @_ZNSs4_Rep9_S_createEjjRKSaIcE()
+
+declare void @_ZNK4llvm13CodeGenTarget10getRegBankEv()
diff --git a/test/CodeGen/X86/2012-05-19-avx2-store.ll b/test/CodeGen/X86/2012-05-19-avx2-store.ll
new file mode 100644
index 000000000000..1c1e8e2f0a21
--- /dev/null
+++ b/test/CodeGen/X86/2012-05-19-avx2-store.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx2 | FileCheck %s
+
+define void @double_save(<4 x i32>* %Ap, <4 x i32>* %Bp, <8 x i32>* %P) nounwind ssp {
+entry:
+ ; CHECK: vmovaps
+ ; CHECK: vinsertf128 $1, ([[A0:%rdi|%rsi]]),
+ ; CHECK: vmovups
+ %A = load <4 x i32>* %Ap
+ %B = load <4 x i32>* %Bp
+ %Z = shufflevector <4 x i32>%A, <4 x i32>%B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ store <8 x i32> %Z, <8 x i32>* %P, align 16
+ ret void
+}
diff --git a/test/CodeGen/X86/2012-07-10-extload64.ll b/test/CodeGen/X86/2012-07-10-extload64.ll
new file mode 100644
index 000000000000..906b748fa420
--- /dev/null
+++ b/test/CodeGen/X86/2012-07-10-extload64.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7 -mtriple=i686-pc-win32 | FileCheck %s
+
+; CHECK: load_store
+define void @load_store(<4 x i16>* %in) {
+entry:
+; CHECK: movsd
+ %A27 = load <4 x i16>* %in, align 4
+ %A28 = add <4 x i16> %A27, %A27
+; CHECK: movlpd
+ store <4 x i16> %A28, <4 x i16>* %in, align 4
+ ret void
+; CHECK: ret
+}
+
+; Make sure that we store a 64bit value, even on 32bit systems.
+;CHECK: store_64
+define void @store_64(<2 x i32>* %ptr) {
+BB:
+ store <2 x i32> zeroinitializer, <2 x i32>* %ptr
+ ret void
+;CHECK: movlpd
+;CHECK: ret
+}
+
+;CHECK: load_64
+define <2 x i32> @load_64(<2 x i32>* %ptr) {
+BB:
+ %t = load <2 x i32>* %ptr
+ ret <2 x i32> %t
+;CHECK: movsd
+;CHECK: ret
+}
diff --git a/test/CodeGen/X86/2012-07-10-shufnorm.ll b/test/CodeGen/X86/2012-07-10-shufnorm.ll
new file mode 100644
index 000000000000..e39df58877f6
--- /dev/null
+++ b/test/CodeGen/X86/2012-07-10-shufnorm.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7 -mattr=+avx | FileCheck %s
+
+; CHECK: ocl
+define void @ocl() {
+entry:
+ %vext = shufflevector <2 x double> zeroinitializer, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %vecinit = shufflevector <8 x double> %vext, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %vecinit1 = insertelement <8 x double> %vecinit, double undef, i32 2
+ %vecinit3 = insertelement <8 x double> %vecinit1, double undef, i32 3
+ %vecinit5 = insertelement <8 x double> %vecinit3, double 0.000000e+00, i32 4
+ %vecinit9 = shufflevector <8 x double> %vecinit5, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 8, i32 9, i32 10>
+ store <8 x double> %vecinit9, <8 x double>* undef
+ ret void
+; CHECK: vxorps
+; CHECK: ret
+}
+
diff --git a/test/CodeGen/X86/2012-07-15-broadcastfold.ll b/test/CodeGen/X86/2012-07-15-broadcastfold.ll
new file mode 100644
index 000000000000..3b7a8a7b871c
--- /dev/null
+++ b/test/CodeGen/X86/2012-07-15-broadcastfold.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7 -mattr=+avx2 | FileCheck %s
+
+declare x86_fastcallcc i64 @barrier()
+
+;CHECK: bcast_fold
+;CHECK: vmovaps %xmm{{[0-9]+}}, [[SPILLED:[^\)]+\)]]
+;CHECK: barrier
+;CHECK: vbroadcastss [[SPILLED]], %ymm0
+;CHECK: ret
+define <8 x float> @bcast_fold( float* %A) {
+BB:
+ %A0 = load float* %A
+ %tt3 = call x86_fastcallcc i64 @barrier()
+ br i1 undef, label %work, label %exit
+
+work:
+ %A1 = insertelement <8 x float> undef, float %A0, i32 0
+ %A2 = shufflevector <8 x float> %A1, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ ret <8 x float> %A2
+
+exit:
+ ret <8 x float> undef
+}
diff --git a/test/CodeGen/X86/2012-07-15-tconst_shl.ll b/test/CodeGen/X86/2012-07-15-tconst_shl.ll
new file mode 100644
index 000000000000..46eca7644ebb
--- /dev/null
+++ b/test/CodeGen/X86/2012-07-15-tconst_shl.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+avx2
+; make sure that we are not crashing.
+
+define <16 x i32> @autogen_SD34717() {
+BB:
+ %Shuff7 = shufflevector <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i32> <i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 undef, i32 22, i32 24, i32 26, i32 28, i32 30, i32 undef>
+ %B9 = lshr <16 x i32> zeroinitializer, %Shuff7
+ ret <16 x i32> %B9
+}
diff --git a/test/CodeGen/X86/2012-07-15-vshl.ll b/test/CodeGen/X86/2012-07-15-vshl.ll
new file mode 100644
index 000000000000..cd0fef469e6a
--- /dev/null
+++ b/test/CodeGen/X86/2012-07-15-vshl.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7 -mattr=+avx
+; PR13352
+
+declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
+
+define void @f_f() nounwind {
+allocas:
+ br label %for_loop29
+
+for_loop29: ; preds = %safe_if_after_true, %allocas
+ %indvars.iv596 = phi i64 [ %indvars.iv.next597, %safe_if_after_true ], [ 0, %allocas ]
+ %0 = trunc i64 %indvars.iv596 to i32
+ %smear.15 = insertelement <16 x i32> undef, i32 %0, i32 15
+ %bitop = lshr <16 x i32> zeroinitializer, %smear.15
+ %bitop35 = and <16 x i32> %bitop, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %bitop35_to_bool = icmp ne <16 x i32> %bitop35, zeroinitializer
+ %val_to_boolvec32 = sext <16 x i1> %bitop35_to_bool to <16 x i32>
+ %floatmask.i526 = bitcast <16 x i32> %val_to_boolvec32 to <16 x float>
+ %mask1.i529 = shufflevector <16 x float> %floatmask.i526, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %"internal_mask&function_mask41_any" = icmp eq i32 undef, 0
+ br i1 %"internal_mask&function_mask41_any", label %safe_if_after_true, label %safe_if_run_true
+
+safe_if_after_true: ; preds = %for_loop29
+ %indvars.iv.next597 = add i64 %indvars.iv596, 1
+ br label %for_loop29
+
+safe_if_run_true: ; preds = %for_loop29
+ %blend1.i583 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> undef, <8 x float> undef, <8 x float> %mask1.i529) nounwind
+ unreachable
+}
+
diff --git a/test/CodeGen/X86/2012-07-16-LeaUndef.ll b/test/CodeGen/X86/2012-07-16-LeaUndef.ll
new file mode 100644
index 000000000000..9e5cbd2f3373
--- /dev/null
+++ b/test/CodeGen/X86/2012-07-16-LeaUndef.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7
+
+define void @autogen_SD2543() {
+A:
+ %E83 = add i32 0, 1
+ %E820 = add i32 0, undef
+ br label %C
+C:
+ %B908 = add i32 %E83, %E820
+ store i32 %B908, i32* undef
+ %Sl2391 = select i1 undef, i32 undef, i32 %E83
+ %Cmp3114 = icmp ne i32 %Sl2391, undef
+ br i1 %Cmp3114, label %C, label %G
+G:
+ ret void
+}
diff --git a/test/CodeGen/X86/2012-07-16-fp2ui-i1.ll b/test/CodeGen/X86/2012-07-16-fp2ui-i1.ll
new file mode 100644
index 000000000000..17533a1e1649
--- /dev/null
+++ b/test/CodeGen/X86/2012-07-16-fp2ui-i1.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7
+
+define void @autogen_SD3100() {
+BB:
+ %FC123 = fptoui float 0x40693F5D00000000 to i1
+ br i1 %FC123, label %V, label %W
+
+V:
+ ret void
+W:
+ ret void
+}
diff --git a/test/CodeGen/X86/2012-07-17-vtrunc.ll b/test/CodeGen/X86/2012-07-17-vtrunc.ll
new file mode 100644
index 000000000000..2de2f97d7d2d
--- /dev/null
+++ b/test/CodeGen/X86/2012-07-17-vtrunc.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7
+
+define void @autogen_SD33189483() {
+BB:
+ br label %CF76
+
+CF76: ; preds = %CF76, %BB
+ %Shuff13 = shufflevector <4 x i64> zeroinitializer, <4 x i64> undef, <4 x i32> zeroinitializer
+ %Tr16 = trunc <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1> to <8 x i1>
+ %E19 = extractelement <8 x i1> %Tr16, i32 2
+ br i1 %E19, label %CF76, label %CF78
+
+CF78: ; preds = %CF78, %CF76
+ %BC = bitcast <4 x i64> %Shuff13 to <4 x double>
+ br label %CF78
+}
diff --git a/test/CodeGen/X86/2012-07-23-select_cc.ll b/test/CodeGen/X86/2012-07-23-select_cc.ll
new file mode 100644
index 000000000000..33fcb120e162
--- /dev/null
+++ b/test/CodeGen/X86/2012-07-23-select_cc.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; PR 13428
+
+declare void @use(double)
+
+define void @test() {
+entry:
+ call void @use(double 1.000000e+00)
+ %A = icmp eq i64 undef, 2
+ %B = zext i1 %A to i32
+ %C = sitofp i32 %B to double
+ call void @use(double %C)
+ call void @use(double 0.000000e+00)
+ unreachable
+}
diff --git a/test/CodeGen/X86/2012-08-07-CmpISelBug.ll b/test/CodeGen/X86/2012-08-07-CmpISelBug.ll
new file mode 100644
index 000000000000..000b853ab8f6
--- /dev/null
+++ b/test/CodeGen/X86/2012-08-07-CmpISelBug.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck %s
+
+; Cmp lowering should not look past the truncate unless the high bits are known
+; zero.
+; rdar://12027825
+
+define void @foo(i8 %arg4, i32 %arg5, i32* %arg14) nounwind {
+bb:
+; CHECK: foo:
+; CHECK-NOT: testl
+; CHECK: testb
+ %tmp48 = zext i8 %arg4 to i32
+ %tmp49 = and i32 %tmp48, 32
+ %tmp50 = add i32 %tmp49, 1593371643
+ %tmp55 = sub i32 %tmp50, 0
+ %tmp56 = add i32 %tmp55, 7787538
+ %tmp57 = xor i32 %tmp56, 1601159181
+ %tmp58 = xor i32 %arg5, 1601159181
+ %tmp59 = and i32 %tmp57, %tmp58
+ %tmp60 = add i32 %tmp59, -1263900958
+ %tmp67 = sub i32 %tmp60, 0
+ %tmp103 = xor i32 %tmp56, 13
+ %tmp104 = trunc i32 %tmp103 to i8
+ %tmp105 = sub i8 0, %tmp104
+ %tmp106 = add i8 %tmp105, -103
+ %tmp113 = sub i8 %tmp106, 0
+ %tmp114 = add i8 %tmp113, -72
+ %tmp141 = icmp ne i32 %tmp67, -1263900958
+ %tmp142 = select i1 %tmp141, i8 %tmp114, i8 undef
+ %tmp143 = xor i8 %tmp142, 81
+ %tmp144 = zext i8 %tmp143 to i32
+ %tmp145 = add i32 %tmp144, 2062143348
+ %tmp152 = sub i32 %tmp145, 0
+ store i32 %tmp152, i32* %arg14
+ ret void
+}
diff --git a/test/CodeGen/X86/4char-promote.ll b/test/CodeGen/X86/4char-promote.ll
index 386057f0a3b6..4f1a859fd436 100644
--- a/test/CodeGen/X86/4char-promote.ll
+++ b/test/CodeGen/X86/4char-promote.ll
@@ -1,11 +1,12 @@
; A test for checking PR 9623
-;RUN: llc -march=x86-64 -mcpu=corei7 -promote-elements < %s | FileCheck %s
+; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s
target triple = "x86_64-apple-darwin"
-; CHECK: pmulld
-; CHECK: paddd
-; CHECK: movdqa
+; CHECK: pmulld
+; CHECK: paddd
+; CHECK-NOT: movdqa
+; CHECK: ret
define <4 x i8> @foo(<4 x i8> %x, <4 x i8> %y) {
entry:
diff --git a/test/CodeGen/X86/MachineSink-PHIUse.ll b/test/CodeGen/X86/MachineSink-PHIUse.ll
index 3758fd8ce500..33141680aa92 100644
--- a/test/CodeGen/X86/MachineSink-PHIUse.ll
+++ b/test/CodeGen/X86/MachineSink-PHIUse.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-appel-darwin -disable-cgp-branch-opts -stats |& grep {machine-sink}
+; RUN: llc < %s -mtriple=x86_64-appel-darwin -disable-cgp-branch-opts -stats 2>&1 | grep "machine-sink"
define fastcc void @t() nounwind ssp {
entry:
diff --git a/test/CodeGen/X86/add.ll b/test/CodeGen/X86/add.ll
index 8e871f4aeb4d..03d2e472cba6 100644
--- a/test/CodeGen/X86/add.ll
+++ b/test/CodeGen/X86/add.ll
@@ -1,8 +1,6 @@
; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -join-physregs | FileCheck %s -check-prefix=X64
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32 -join-physregs | FileCheck %s -check-prefix=X64
-
-; Some of these tests depend on -join-physregs to commute instructions.
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32 | FileCheck %s -check-prefix=X64
; The immediate can be encoded in a smaller way if the
; instruction is a sub instead of an add.
@@ -101,9 +99,9 @@ define {i32, i1} @test7(i32 %v1, i32 %v2) nounwind {
}
; X64: test7:
-; X64: addl %e[[A1]], %eax
+; X64: addl %e[[A1]], %e
; X64-NEXT: setb %dl
-; X64-NEXT: ret
+; X64: ret
; PR5443
define {i64, i1} @test8(i64 %left, i64 %right) nounwind {
diff --git a/test/CodeGen/X86/addr-label-difference.ll b/test/CodeGen/X86/addr-label-difference.ll
index 49abd8a92e64..15fbec52e243 100644
--- a/test/CodeGen/X86/addr-label-difference.ll
+++ b/test/CodeGen/X86/addr-label-difference.ll
@@ -1,4 +1,4 @@
-; RUN: llc %s -o - | grep {__TEXT,__const}
+; RUN: llc %s -o - | grep "__TEXT,__const"
; PR5929
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
target triple = "i386-apple-darwin10.0"
diff --git a/test/CodeGen/X86/aligned-comm.ll b/test/CodeGen/X86/aligned-comm.ll
index 7715869ed99c..eab02cc1f9d5 100644
--- a/test/CodeGen/X86/aligned-comm.ll
+++ b/test/CodeGen/X86/aligned-comm.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -march=x86
-; RUN: llc < %s -mtriple=i386-apple-darwin10 | grep {array,16512,7}
-; RUN: llc < %s -mtriple=i386-apple-darwin9 | grep {array,16512,7}
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | grep "array,16512,7"
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | grep "array,16512,7"
; Darwin 9+ should get alignment on common symbols.
@array = common global [4128 x i32] zeroinitializer, align 128
diff --git a/test/CodeGen/X86/alignment-2.ll b/test/CodeGen/X86/alignment-2.ll
index cc709b52d934..1f9e85cbb763 100644
--- a/test/CodeGen/X86/alignment-2.ll
+++ b/test/CodeGen/X86/alignment-2.ll
@@ -18,7 +18,9 @@
define signext i8 @do_lo_list() nounwind optsize ssp {
bb:
; CHECK: do_lo_list
-; CHECK-NOT: movaps
+; Make sure we do not use movaps for the global variable.
+; It is okay to use movaps for writing the local variable on stack.
+; CHECK-NOT: movaps {{[0-9]*}}(%{{[a-z]*}}), {{%xmm[0-9]}}
%myopt = alloca %struct.printQueryOpt, align 4
%tmp = bitcast %struct.printQueryOpt* %myopt to i8*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* bitcast (%struct.printQueryOpt* getelementptr inbounds (%struct._psqlSettings* @pset, i32 0, i32 4) to i8*), i32 76, i32 4, i1 false)
diff --git a/test/CodeGen/X86/alloca-align-rounding-32.ll b/test/CodeGen/X86/alloca-align-rounding-32.ll
index c0f1a18123e6..a45284e10cf4 100644
--- a/test/CodeGen/X86/alloca-align-rounding-32.ll
+++ b/test/CodeGen/X86/alloca-align-rounding-32.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin | grep and | count 1
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin | FileCheck %s
declare void @bar(<2 x i64>* %n)
@@ -6,10 +6,15 @@ define void @foo(i32 %h) {
%p = alloca <2 x i64>, i32 %h
call void @bar(<2 x i64>* %p)
ret void
+; CHECK: foo
+; CHECK-NOT: andl $-32, %eax
}
define void @foo2(i32 %h) {
%p = alloca <2 x i64>, i32 %h, align 32
call void @bar(<2 x i64>* %p)
ret void
+; CHECK: foo2
+; CHECK: andl $-32, %esp
+; CHECK: andl $-32, %eax
}
diff --git a/test/CodeGen/X86/alloca-align-rounding.ll b/test/CodeGen/X86/alloca-align-rounding.ll
index 3c87dbf2bd78..3d76fb0aa25b 100644
--- a/test/CodeGen/X86/alloca-align-rounding.ll
+++ b/test/CodeGen/X86/alloca-align-rounding.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux | grep and | count 1
+; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux | FileCheck %s
declare void @bar(<2 x i64>* %n)
@@ -6,10 +6,15 @@ define void @foo(i64 %h) {
%p = alloca <2 x i64>, i64 %h
call void @bar(<2 x i64>* %p)
ret void
+; CHECK: foo
+; CHECK-NOT: andq $-32, %rax
}
define void @foo2(i64 %h) {
%p = alloca <2 x i64>, i64 %h, align 32
call void @bar(<2 x i64>* %p)
ret void
+; CHECK: foo2
+; CHECK: andq $-32, %rsp
+; CHECK: andq $-32, %rax
}
diff --git a/test/CodeGen/X86/andimm8.ll b/test/CodeGen/X86/andimm8.ll
index a3dc85ff5ce5..640237d0b504 100644
--- a/test/CodeGen/X86/andimm8.ll
+++ b/test/CodeGen/X86/andimm8.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-linux-gnu -show-mc-encoding -join-physregs | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-linux-gnu -show-mc-encoding | FileCheck %s
; PR8365
; CHECK: andl $-64, %edi # encoding: [0x83,0xe7,0xc0]
diff --git a/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll b/test/CodeGen/X86/asm-reg-type-mismatch.ll
index f0d46a0252c3..47accdbc07b3 100644
--- a/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll
+++ b/test/CodeGen/X86/asm-reg-type-mismatch.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -mcpu=core2 | grep xorps | count 2
-; RUN: llc < %s -mcpu=core2 | not grep movap
+; RUN: llc < %s -mcpu=core2 | FileCheck %s
; PR2715
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
@@ -11,8 +10,22 @@ target triple = "x86_64-unknown-linux-gnu"
%struct.nsXPTCVariant = type { %struct.nsXPTCMiniVariant, i8*, %struct.nsXPTType, i8 }
%struct.nsXPTType = type { %struct.XPTTypeDescriptorPrefix }
-define i32 @XPTC_InvokeByIndex(%struct.nsISupports* %that, i32 %methodIndex, i32 %paramCount, %struct.nsXPTCVariant* %params) nounwind {
+define i32 @test1(%struct.nsISupports* %that, i32 %methodIndex, i32 %paramCount, %struct.nsXPTCVariant* %params) nounwind {
entry:
call void asm sideeffect "", "{xmm0},{xmm1},{xmm2},{xmm3},{xmm4},{xmm5},{xmm6},{xmm7},~{dirflag},~{fpsr},~{flags}"( double undef, double undef, double undef, double 1.0, double undef, double 0.0, double undef, double 0.0 ) nounwind
ret i32 0
+ ; CHECK: test1
+ ; CHECK-NOT: movap
+ ; CHECK: xorps
+ ; CHECK: xorps
+ ; CHECK-NOT: movap
+}
+
+define i64 @test2() nounwind {
+entry:
+ %0 = tail call i64 asm sideeffect "movq $1, $0", "={xmm7},*m,~{dirflag},~{fpsr},~{flags}"(i64* null) nounwind
+ ret i64 %0
+ ; CHECK: test2
+ ; CHECK: movq {{.*}}, %xmm7
+ ; CHECK: movd %xmm7, %rax
}
diff --git a/test/CodeGen/X86/atom-lea-sp.ll b/test/CodeGen/X86/atom-lea-sp.ll
index 59427880a71d..19482e13d8c8 100644
--- a/test/CodeGen/X86/atom-lea-sp.ll
+++ b/test/CodeGen/X86/atom-lea-sp.ll
@@ -1,15 +1,15 @@
-; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck -check-prefix=atom %s
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck -check-prefix=ATOM %s
; RUN: llc < %s -mcpu=core2 -mtriple=i686-linux | FileCheck %s
declare void @use_arr(i8*)
declare void @many_params(i32, i32, i32, i32, i32, i32)
define void @test1() nounwind {
-; atom: test1:
-; atom: leal -1052(%esp), %esp
-; atom-NOT: sub
-; atom: call
-; atom: leal 1052(%esp), %esp
+; ATOM: test1:
+; ATOM: leal -1052(%esp), %esp
+; ATOM-NOT: sub
+; ATOM: call
+; ATOM: leal 1052(%esp), %esp
; CHECK: test1:
; CHECK: subl
@@ -22,10 +22,10 @@ define void @test1() nounwind {
}
define void @test2() nounwind {
-; atom: test2:
-; atom: leal -28(%esp), %esp
-; atom: call
-; atom: leal 28(%esp), %esp
+; ATOM: test2:
+; ATOM: leal -28(%esp), %esp
+; ATOM: call
+; ATOM: leal 28(%esp), %esp
; CHECK: test2:
; CHECK-NOT: lea
@@ -34,9 +34,9 @@ define void @test2() nounwind {
}
define void @test3() nounwind {
-; atom: test3:
-; atom: leal -8(%esp), %esp
-; atom: leal 8(%esp), %esp
+; ATOM: test3:
+; ATOM: leal -8(%esp), %esp
+; ATOM: leal 8(%esp), %esp
; CHECK: test3:
; CHECK-NOT: lea
diff --git a/test/CodeGen/X86/atom-sched.ll b/test/CodeGen/X86/atom-sched.ll
index 4dd9a9e3481d..0d97e8535824 100644
--- a/test/CodeGen/X86/atom-sched.ll
+++ b/test/CodeGen/X86/atom-sched.ll
@@ -1,9 +1,6 @@
-; XFAIL: *
; RUN: llc <%s -O2 -mcpu=atom -march=x86 -relocation-model=static | FileCheck -check-prefix=atom %s
; RUN: llc <%s -O2 -mcpu=core2 -march=x86 -relocation-model=static | FileCheck %s
;
-; FIXME: Atom's scheduler is temporarily disabled.
-; XFAIL: *
@a = common global i32 0, align 4
@b = common global i32 0, align 4
diff --git a/test/CodeGen/X86/atomic_op.ll b/test/CodeGen/X86/atomic_op.ll
index 7c5abe2095cc..152bece4240f 100644
--- a/test/CodeGen/X86/atomic_op.ll
+++ b/test/CodeGen/X86/atomic_op.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/avx-blend.ll b/test/CodeGen/X86/avx-blend.ll
index 772949173392..188efe26d92a 100644
--- a/test/CodeGen/X86/avx-blend.ll
+++ b/test/CodeGen/X86/avx-blend.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -promote-elements -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
; AVX128 tests:
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index b33493252a5f..c44beb4bc2b8 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7 -mattr=avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7-avx | FileCheck %s
define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK: vaesdec
@@ -1154,7 +1154,7 @@ define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) {
; CHECK: movl
; CHECK: movl
; CHECK: vpcmpestri
- ; CHECK: movl
+ ; CHECK: seta
%res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1165,7 +1165,7 @@ define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) {
; CHECK: movl
; CHECK: movl
; CHECK: vpcmpestri
- ; CHECK: movl
+ ; CHECK: sbbl
%res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1176,7 +1176,7 @@ define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) {
; CHECK: movl
; CHECK: movl
; CHECK: vpcmpestri
- ; CHECK: movl
+ ; CHECK: seto
%res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1187,7 +1187,7 @@ define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) {
; CHECK: movl
; CHECK: movl
; CHECK: vpcmpestri
- ; CHECK: movl
+ ; CHECK: sets
%res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1198,7 +1198,7 @@ define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) {
; CHECK: movl
; CHECK: movl
; CHECK: vpcmpestri
- ; CHECK: movl
+ ; CHECK: sete
%res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1209,6 +1209,7 @@ define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) {
; CHECK: movl
; CHECK: movl
; CHECK: vpcmpestrm
+ ; CHECK-NOT: vmov
%res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
}
@@ -1226,7 +1227,7 @@ declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind read
define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK: vpcmpistri
- ; CHECK: movl
+ ; CHECK: seta
%res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1235,7 +1236,7 @@ declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind rea
define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK: vpcmpistri
- ; CHECK: movl
+ ; CHECK: sbbl
%res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1244,7 +1245,7 @@ declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind rea
define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK: vpcmpistri
- ; CHECK: movl
+ ; CHECK: seto
%res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1253,7 +1254,7 @@ declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind rea
define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK: vpcmpistri
- ; CHECK: movl
+ ; CHECK: sets
%res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1262,7 +1263,7 @@ declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind rea
define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK: vpcmpistri
- ; CHECK: movl
+ ; CHECK: sete
%res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1271,6 +1272,7 @@ declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind rea
define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK: vpcmpistrm
+ ; CHECK-NOT: vmov
%res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
}
@@ -2555,3 +2557,36 @@ define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
ret i32 %tmp
}
declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
+
+; CHECK: movntdq
+define void @movnt_dq(i8* %p, <4 x i64> %a1) nounwind {
+ %a2 = add <4 x i64> %a1, <i64 1, i64 1, i64 1, i64 1>
+ tail call void @llvm.x86.avx.movnt.dq.256(i8* %p, <4 x i64> %a2) nounwind
+ ret void
+}
+declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind
+
+; CHECK: movntps
+define void @movnt_ps(i8* %p, <8 x float> %a) nounwind {
+ tail call void @llvm.x86.avx.movnt.ps.256(i8* %p, <8 x float> %a) nounwind
+ ret void
+}
+declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
+
+; CHECK: movntpd
+define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind {
+ ; add operation forces the execution domain.
+ %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
+ tail call void @llvm.x86.avx.movnt.pd.256(i8* %p, <4 x double> %a2) nounwind
+ ret void
+}
+declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind
+
+
+; Check for pclmulqdq
+define <2 x i64> @test_x86_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) {
+; CHECK: vpclmulqdq
+ %res = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone
diff --git a/test/CodeGen/X86/avx-minmax.ll b/test/CodeGen/X86/avx-minmax.ll
index 7c5882010945..eff92510348a 100644
--- a/test/CodeGen/X86/avx-minmax.ll
+++ b/test/CodeGen/X86/avx-minmax.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+avx -asm-verbose=false -join-physregs -enable-unsafe-fp-math -enable-no-nans-fp-math -promote-elements | FileCheck -check-prefix=UNSAFE %s
+; RUN: llc < %s -march=x86-64 -mattr=+avx -asm-verbose=false -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=UNSAFE %s
; UNSAFE: maxpd:
; UNSAFE: vmaxpd {{.+}}, %xmm
diff --git a/test/CodeGen/X86/avx-shuffle-x86_32.ll b/test/CodeGen/X86/avx-shuffle-x86_32.ll
index 5268ec3a56cd..e203c4ed0298 100755
--- a/test/CodeGen/X86/avx-shuffle-x86_32.ll
+++ b/test/CodeGen/X86/avx-shuffle-x86_32.ll
@@ -4,5 +4,5 @@ define <4 x i64> @test1(<4 x i64> %a) nounwind {
%b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
ret <4 x i64>%b
; CHECK: test1:
- ; CHECK: vinsertf128
+ ; CHECK-NOT: vinsertf128
}
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll
index 16c447be1727..9b41709a3b1b 100644
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -90,8 +90,8 @@ define i32 @test9(<4 x i32> %a) nounwind {
; Extract a value which is the result of an undef mask.
define i32 @test10(<4 x i32> %a) nounwind {
; CHECK: @test10
-; CHECK-NEXT: #
-; CHECK-NEXT: ret
+; CHECK-NOT: {{^[^#]*[a-z]}}
+; CHECK: ret
%b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%r = extractelement <8 x i32> %b, i32 2
ret i32 %r
@@ -149,17 +149,26 @@ entry:
}
; PR12413
+; CHECK: shuf1
+; CHECK: vpshufb
+; CHECK: vpshufb
; CHECK: vpshufb
; CHECK: vpshufb
+define <32 x i8> @shuf1(<32 x i8> %inval1, <32 x i8> %inval2) {
+entry:
+ %0 = shufflevector <32 x i8> %inval1, <32 x i8> %inval2, <32 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62>
+ ret <32 x i8> %0
+}
+
+; handle the case where only half of the 256-bits is splittable
+; CHECK: shuf2
; CHECK: vpshufb
; CHECK: vpshufb
-define <32 x i8> @shuf(<32 x i8> %inval1, <32 x i8> %inval2) {
+; CHECK: vpextrb
+; CHECK: vpextrb
+define <32 x i8> @shuf2(<32 x i8> %inval1, <32 x i8> %inval2) {
entry:
- %0 = shufflevector <32 x i8> %inval1, <32 x i8> %inval2, <32 x i32> <i32 0,
-i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32
-22, i32 24, i32 26, i32 28, i32 30, i32 32, i32 34, i32 36, i32 38, i32 40, i32
-42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32
-62>
+ %0 = shufflevector <32 x i8> %inval1, <32 x i8> %inval2, <32 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 31, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62>
ret <32 x i8> %0
}
@@ -202,3 +211,40 @@ define <4 x i64> @blend4(<4 x i64> %a, <4 x i64> %b) nounwind alwaysinline {
%t = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
ret <4 x i64> %t
}
+
+; CHECK: narrow
+; CHECK: vpermilps
+; CHECK: ret
+define <16 x i16> @narrow(<16 x i16> %a) nounwind alwaysinline {
+ %t = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 undef, i32 14, i32 15, i32 undef, i32 undef>
+ ret <16 x i16> %t
+}
+
+;CHECK: test17
+;CHECK-NOT: vinsertf128
+;CHECK: ret
+define <8 x float> @test17(<4 x float> %y) {
+ %x = shufflevector <4 x float> %y, <4 x float> undef, <8 x i32> <i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <8 x float> %x
+}
+
+; CHECK: test18
+; CHECK: vshufps
+; CHECK: vshufps
+; CHECK: vunpcklps
+; CHECK: ret
+define <8 x float> @test18(<8 x float> %A, <8 x float>%B) nounwind {
+ %S = shufflevector <8 x float> %A, <8 x float> %B, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+ ret <8 x float>%S
+}
+
+; CHECK: test19
+; CHECK: vshufps
+; CHECK: vshufps
+; CHECK: vunpcklps
+; CHECK: ret
+define <8 x float> @test19(<8 x float> %A, <8 x float>%B) nounwind {
+ %S = shufflevector <8 x float> %A, <8 x float> %B, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+ ret <8 x float>%S
+}
+
diff --git a/test/CodeGen/X86/avx-vbroadcast.ll b/test/CodeGen/X86/avx-vbroadcast.ll
index 148ae7329f4b..0d403d4bb124 100644
--- a/test/CodeGen/X86/avx-vbroadcast.ll
+++ b/test/CodeGen/X86/avx-vbroadcast.ll
@@ -112,3 +112,32 @@ entry:
%vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
ret <2 x double> %vecinit2.i
}
+
+; CHECK: _RR
+; CHECK: vbroadcastss (%
+; CHECK: ret
+define <4 x float> @_RR(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
+entry:
+ %q = load float* %ptr, align 4
+ %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
+ %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
+ %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
+ %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
+ ; force a chain
+ %j = load i32* %k, align 4
+ store i32 %j, i32* undef
+ ret <4 x float> %vecinit6.i
+}
+
+
+; CHECK: _RR2
+; CHECK: vbroadcastss (%
+; CHECK: ret
+define <4 x float> @_RR2(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
+entry:
+ %q = load float* %ptr, align 4
+ %v = insertelement <4 x float> undef, float %q, i32 0
+ %t = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
+ ret <4 x float> %t
+}
+
diff --git a/test/CodeGen/X86/avx2-conversions.ll b/test/CodeGen/X86/avx2-conversions.ll
new file mode 100755
index 000000000000..b47491335a31
--- /dev/null
+++ b/test/CodeGen/X86/avx2-conversions.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; CHECK: trunc4
+; CHECK: vpermd
+; CHECK-NOT: vinsert
+; CHECK: ret
+define <4 x i32> @trunc4(<4 x i64> %A) nounwind {
+ %B = trunc <4 x i64> %A to <4 x i32>
+ ret <4 x i32>%B
+}
+
+; CHECK: trunc8
+; CHECK: vpshufb
+; CHECK-NOT: vinsert
+; CHECK: ret
+
+define <8 x i16> @trunc8(<8 x i32> %A) nounwind {
+ %B = trunc <8 x i32> %A to <8 x i16>
+ ret <8 x i16>%B
+}
+
+; CHECK: sext4
+; CHECK: vpmovsxdq
+; CHECK-NOT: vinsert
+; CHECK: ret
+define <4 x i64> @sext4(<4 x i32> %A) nounwind {
+ %B = sext <4 x i32> %A to <4 x i64>
+ ret <4 x i64>%B
+}
+
+; CHECK: sext8
+; CHECK: vpmovsxwd
+; CHECK-NOT: vinsert
+; CHECK: ret
+define <8 x i32> @sext8(<8 x i16> %A) nounwind {
+ %B = sext <8 x i16> %A to <8 x i32>
+ ret <8 x i32>%B
+}
+
+; CHECK: zext4
+; CHECK: vpmovzxdq
+; CHECK-NOT: vinsert
+; CHECK: ret
+define <4 x i64> @zext4(<4 x i32> %A) nounwind {
+ %B = zext <4 x i32> %A to <4 x i64>
+ ret <4 x i64>%B
+}
+
+; CHECK: zext8
+; CHECK: vpmovzxwd
+; CHECK-NOT: vinsert
+; CHECK: ret
+define <8 x i32> @zext8(<8 x i16> %A) nounwind {
+ %B = zext <8 x i16> %A to <8 x i32>
+ ret <8 x i32>%B
+}
+; CHECK: zext_8i8_8i32
+; CHECK: vpmovzxwd
+; CHECK: vpand
+; CHECK: ret
+define <8 x i32> @zext_8i8_8i32(<8 x i8> %A) nounwind {
+ %B = zext <8 x i8> %A to <8 x i32>
+ ret <8 x i32>%B
+}
+
+
+
+
diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll
index 3f27a0291b4f..a6141b095617 100644
--- a/test/CodeGen/X86/avx2-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll
@@ -976,3 +976,182 @@ define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
ret void
}
declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
+
+define <2 x double> @test_x86_avx2_gather_d_pd(<2 x double> %a0, i8* %a1,
+ <4 x i32> %idx, <2 x double> %mask) {
+ ; CHECK: vgatherdpd
+ %res = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0,
+ i8* %a1, <4 x i32> %idx, <2 x double> %mask, i8 2) ;
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*,
+ <4 x i32>, <2 x double>, i8) nounwind readonly
+
+define <4 x double> @test_x86_avx2_gather_d_pd_256(<4 x double> %a0, i8* %a1,
+ <4 x i32> %idx, <4 x double> %mask) {
+ ; CHECK: vgatherdpd
+ %res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0,
+ i8* %a1, <4 x i32> %idx, <4 x double> %mask, i8 2) ;
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*,
+ <4 x i32>, <4 x double>, i8) nounwind readonly
+
+define <2 x double> @test_x86_avx2_gather_q_pd(<2 x double> %a0, i8* %a1,
+ <2 x i64> %idx, <2 x double> %mask) {
+ ; CHECK: vgatherqpd
+ %res = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0,
+ i8* %a1, <2 x i64> %idx, <2 x double> %mask, i8 2) ;
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, i8*,
+ <2 x i64>, <2 x double>, i8) nounwind readonly
+
+define <4 x double> @test_x86_avx2_gather_q_pd_256(<4 x double> %a0, i8* %a1,
+ <4 x i64> %idx, <4 x double> %mask) {
+ ; CHECK: vgatherqpd
+ %res = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0,
+ i8* %a1, <4 x i64> %idx, <4 x double> %mask, i8 2) ;
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, i8*,
+ <4 x i64>, <4 x double>, i8) nounwind readonly
+
+define <4 x float> @test_x86_avx2_gather_d_ps(<4 x float> %a0, i8* %a1,
+ <4 x i32> %idx, <4 x float> %mask) {
+ ; CHECK: vgatherdps
+ %res = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0,
+ i8* %a1, <4 x i32> %idx, <4 x float> %mask, i8 2) ;
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*,
+ <4 x i32>, <4 x float>, i8) nounwind readonly
+
+define <8 x float> @test_x86_avx2_gather_d_ps_256(<8 x float> %a0, i8* %a1,
+ <8 x i32> %idx, <8 x float> %mask) {
+ ; CHECK: vgatherdps
+ %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0,
+ i8* %a1, <8 x i32> %idx, <8 x float> %mask, i8 2) ;
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*,
+ <8 x i32>, <8 x float>, i8) nounwind readonly
+
+define <4 x float> @test_x86_avx2_gather_q_ps(<4 x float> %a0, i8* %a1,
+ <2 x i64> %idx, <4 x float> %mask) {
+ ; CHECK: vgatherqps
+ %res = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0,
+ i8* %a1, <2 x i64> %idx, <4 x float> %mask, i8 2) ;
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*,
+ <2 x i64>, <4 x float>, i8) nounwind readonly
+
+define <4 x float> @test_x86_avx2_gather_q_ps_256(<4 x float> %a0, i8* %a1,
+ <4 x i64> %idx, <4 x float> %mask) {
+ ; CHECK: vgatherqps
+ %res = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0,
+ i8* %a1, <4 x i64> %idx, <4 x float> %mask, i8 2) ;
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*,
+ <4 x i64>, <4 x float>, i8) nounwind readonly
+
+define <2 x i64> @test_x86_avx2_gather_d_q(<2 x i64> %a0, i8* %a1,
+ <4 x i32> %idx, <2 x i64> %mask) {
+ ; CHECK: vpgatherdq
+ %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0,
+ i8* %a1, <4 x i32> %idx, <2 x i64> %mask, i8 2) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*,
+ <4 x i32>, <2 x i64>, i8) nounwind readonly
+
+define <4 x i64> @test_x86_avx2_gather_d_q_256(<4 x i64> %a0, i8* %a1,
+ <4 x i32> %idx, <4 x i64> %mask) {
+ ; CHECK: vpgatherdq
+ %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0,
+ i8* %a1, <4 x i32> %idx, <4 x i64> %mask, i8 2) ;
+ ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*,
+ <4 x i32>, <4 x i64>, i8) nounwind readonly
+
+define <2 x i64> @test_x86_avx2_gather_q_q(<2 x i64> %a0, i8* %a1,
+ <2 x i64> %idx, <2 x i64> %mask) {
+ ; CHECK: vpgatherqq
+ %res = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0,
+ i8* %a1, <2 x i64> %idx, <2 x i64> %mask, i8 2) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*,
+ <2 x i64>, <2 x i64>, i8) nounwind readonly
+
+define <4 x i64> @test_x86_avx2_gather_q_q_256(<4 x i64> %a0, i8* %a1,
+ <4 x i64> %idx, <4 x i64> %mask) {
+ ; CHECK: vpgatherqq
+ %res = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0,
+ i8* %a1, <4 x i64> %idx, <4 x i64> %mask, i8 2) ;
+ ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*,
+ <4 x i64>, <4 x i64>, i8) nounwind readonly
+
+define <4 x i32> @test_x86_avx2_gather_d_d(<4 x i32> %a0, i8* %a1,
+ <4 x i32> %idx, <4 x i32> %mask) {
+ ; CHECK: vpgatherdd
+ %res = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %a0,
+ i8* %a1, <4 x i32> %idx, <4 x i32> %mask, i8 2) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*,
+ <4 x i32>, <4 x i32>, i8) nounwind readonly
+
+define <8 x i32> @test_x86_avx2_gather_d_d_256(<8 x i32> %a0, i8* %a1,
+ <8 x i32> %idx, <8 x i32> %mask) {
+ ; CHECK: vpgatherdd
+ %res = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %a0,
+ i8* %a1, <8 x i32> %idx, <8 x i32> %mask, i8 2) ;
+ ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*,
+ <8 x i32>, <8 x i32>, i8) nounwind readonly
+
+define <4 x i32> @test_x86_avx2_gather_q_d(<4 x i32> %a0, i8* %a1,
+ <2 x i64> %idx, <4 x i32> %mask) {
+ ; CHECK: vpgatherqd
+ %res = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %a0,
+ i8* %a1, <2 x i64> %idx, <4 x i32> %mask, i8 2) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*,
+ <2 x i64>, <4 x i32>, i8) nounwind readonly
+
+define <4 x i32> @test_x86_avx2_gather_q_d_256(<4 x i32> %a0, i8* %a1,
+ <4 x i64> %idx, <4 x i32> %mask) {
+ ; CHECK: vpgatherqd
+ %res = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %a0,
+ i8* %a1, <4 x i64> %idx, <4 x i32> %mask, i8 2) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*,
+ <4 x i64>, <4 x i32>, i8) nounwind readonly
+
+; PR13298
+define <8 x float> @test_gather_mask(<8 x float> %a0, float* %a,
+ <8 x i32> %idx, <8 x float> %mask,
+ float* nocapture %out) {
+; CHECK: test_gather_mask
+; CHECK: vmovdqa %ymm2, [[DEST:%.*]]
+; CHECK: vgatherdps [[DEST]]
+;; gather with mask
+ %a_i8 = bitcast float* %a to i8*
+ %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0,
+ i8* %a_i8, <8 x i32> %idx, <8 x float> %mask, i8 4) ;
+
+;; for debugging, we'll just dump out the mask
+ %out_ptr = bitcast float * %out to <8 x float> *
+ store <8 x float> %mask, <8 x float> * %out_ptr, align 4
+
+ ret <8 x float> %res
+}
diff --git a/test/CodeGen/X86/avx2-shuffle.ll b/test/CodeGen/X86/avx2-shuffle.ll
new file mode 100644
index 000000000000..c5899fa27426
--- /dev/null
+++ b/test/CodeGen/X86/avx2-shuffle.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; Make sure that we don't match this shuffle using the vpblendw YMM instruction.
+; The mask for the vpblendw instruction needs to be identical for both halves
+; of the YMM. Need to use two vpblendw instructions.
+
+; CHECK: blendw1
+; CHECK: vpblendw
+; CHECK: vpblendw
+; CHECK: ret
+define <16 x i16> @blendw1(<16 x i16> %a, <16 x i16> %b) nounwind alwaysinline {
+ %t = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 18, i32 3, i32 20, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31>
+ ret <16 x i16> %t
+}
+
+; CHECK: vpshufhw $27, %ymm
+define <16 x i16> @vpshufhw(<16 x i16> %src1) nounwind uwtable readnone ssp {
+entry:
+ %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 5, i32 4, i32 8, i32 9, i32 10, i32 11, i32 15, i32 14, i32 13, i32 12>
+ ret <16 x i16> %shuffle.i
+}
+
+; CHECK: vpshuflw $27, %ymm
+define <16 x i16> @vpshuflw(<16 x i16> %src1) nounwind uwtable readnone ssp {
+entry:
+ %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 3, i32 undef, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15>
+ ret <16 x i16> %shuffle.i
+}
diff --git a/test/CodeGen/X86/avx2-vbroadcast.ll b/test/CodeGen/X86/avx2-vbroadcast.ll
index 1a78414761ca..b804233663d4 100644
--- a/test/CodeGen/X86/avx2-vbroadcast.ll
+++ b/test/CodeGen/X86/avx2-vbroadcast.ll
@@ -160,6 +160,15 @@ entry:
ret <8 x i32> %g
}
+; CHECK: V113
+; CHECK: vbroadcastss
+; CHECK: ret
+define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp {
+entry:
+ %g = fadd <8 x float> %in, <float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000>
+ ret <8 x float> %g
+}
+
; CHECK: _e2
; CHECK: vbroadcastss
; CHECK: ret
@@ -179,9 +188,170 @@ define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp {
%vecinit1.i = insertelement <8 x i8> %vecinit0.i, i8 52, i32 1
%vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 52, i32 2
%vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 52, i32 3
- %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 52, i32 3
- %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 52, i32 3
- %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 52, i32 3
- %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 52, i32 3
+ %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 52, i32 4
+ %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 52, i32 5
+ %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 52, i32 6
+ %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 52, i32 7
ret <8 x i8> %vecinit7.i
}
+
+
+define void @crash() nounwind alwaysinline {
+WGLoopsEntry:
+ br i1 undef, label %ret, label %footer329VF
+
+footer329VF:
+ %A.0.inVF = fmul float undef, 6.553600e+04
+ %B.0.in407VF = fmul <8 x float> undef, <float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04>
+ %A.0VF = fptosi float %A.0.inVF to i32
+ %B.0408VF = fptosi <8 x float> %B.0.in407VF to <8 x i32>
+ %0 = and <8 x i32> %B.0408VF, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+ %1 = and i32 %A.0VF, 65535
+ %temp1098VF = insertelement <8 x i32> undef, i32 %1, i32 0
+ %vector1099VF = shufflevector <8 x i32> %temp1098VF, <8 x i32> undef, <8 x i32> zeroinitializer
+ br i1 undef, label %preload1201VF, label %footer349VF
+
+preload1201VF:
+ br label %footer349VF
+
+footer349VF:
+ %2 = mul nsw <8 x i32> undef, %0
+ %3 = mul nsw <8 x i32> undef, %vector1099VF
+ br label %footer329VF
+
+ret:
+ ret void
+}
+
+; CHECK: _inreg0
+; CHECK: broadcastss
+; CHECK: ret
+define <8 x i32> @_inreg0(i32 %scalar) nounwind uwtable readnone ssp {
+ %in = insertelement <8 x i32> undef, i32 %scalar, i32 0
+ %wide = shufflevector <8 x i32> %in, <8 x i32> undef, <8 x i32> zeroinitializer
+ ret <8 x i32> %wide
+}
+
+; CHECK: _inreg1
+; CHECK: broadcastss
+; CHECK: ret
+define <8 x float> @_inreg1(float %scalar) nounwind uwtable readnone ssp {
+ %in = insertelement <8 x float> undef, float %scalar, i32 0
+ %wide = shufflevector <8 x float> %in, <8 x float> undef, <8 x i32> zeroinitializer
+ ret <8 x float> %wide
+}
+
+; CHECK: _inreg2
+; CHECK: broadcastss
+; CHECK: ret
+define <4 x float> @_inreg2(float %scalar) nounwind uwtable readnone ssp {
+ %in = insertelement <4 x float> undef, float %scalar, i32 0
+ %wide = shufflevector <4 x float> %in, <4 x float> undef, <4 x i32> zeroinitializer
+ ret <4 x float> %wide
+}
+
+; CHECK: _inreg3
+; CHECK: broadcastsd
+; CHECK: ret
+define <4 x double> @_inreg3(double %scalar) nounwind uwtable readnone ssp {
+ %in = insertelement <4 x double> undef, double %scalar, i32 0
+ %wide = shufflevector <4 x double> %in, <4 x double> undef, <4 x i32> zeroinitializer
+ ret <4 x double> %wide
+}
+
+;CHECK: _inreg8xfloat
+;CHECK: vbroadcastss
+;CHECK: ret
+define <8 x float> @_inreg8xfloat(<8 x float> %a) {
+ %b = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> zeroinitializer
+ ret <8 x float> %b
+}
+
+;CHECK: _inreg4xfloat
+;CHECK: vbroadcastss
+;CHECK: ret
+define <4 x float> @_inreg4xfloat(<4 x float> %a) {
+ %b = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer
+ ret <4 x float> %b
+}
+
+;CHECK: _inreg16xi16
+;CHECK: vpbroadcastw
+;CHECK: ret
+define <16 x i16> @_inreg16xi16(<16 x i16> %a) {
+ %b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer
+ ret <16 x i16> %b
+}
+
+;CHECK: _inreg8xi16
+;CHECK: vpbroadcastw
+;CHECK: ret
+define <8 x i16> @_inreg8xi16(<8 x i16> %a) {
+ %b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer
+ ret <8 x i16> %b
+}
+
+
+;CHECK: _inreg4xi64
+;CHECK: vpbroadcastq
+;CHECK: ret
+define <4 x i64> @_inreg4xi64(<4 x i64> %a) {
+ %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer
+ ret <4 x i64> %b
+}
+
+;CHECK: _inreg2xi64
+;CHECK: vpbroadcastq
+;CHECK: ret
+define <2 x i64> @_inreg2xi64(<2 x i64> %a) {
+ %b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer
+ ret <2 x i64> %b
+}
+
+;CHECK: _inreg4xdouble
+;CHECK: vbroadcastsd
+;CHECK: ret
+define <4 x double> @_inreg4xdouble(<4 x double> %a) {
+ %b = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> zeroinitializer
+ ret <4 x double> %b
+}
+
+;CHECK: _inreg2xdouble
+;CHECK: vpbroadcastq
+;CHECK: ret
+define <2 x double> @_inreg2xdouble(<2 x double> %a) {
+ %b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer
+ ret <2 x double> %b
+}
+
+;CHECK: _inreg8xi32
+;CHECK: vpbroadcastd
+;CHECK: ret
+define <8 x i32> @_inreg8xi32(<8 x i32> %a) {
+ %b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer
+ ret <8 x i32> %b
+}
+
+;CHECK: _inreg4xi32
+;CHECK: vpbroadcastd
+;CHECK: ret
+define <4 x i32> @_inreg4xi32(<4 x i32> %a) {
+ %b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer
+ ret <4 x i32> %b
+}
+
+;CHECK: _inreg32xi8
+;CHECK: vpbroadcastb
+;CHECK: ret
+define <32 x i8> @_inreg32xi8(<32 x i8> %a) {
+ %b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer
+ ret <32 x i8> %b
+}
+
+;CHECK: _inreg16xi8
+;CHECK: vpbroadcastb
+;CHECK: ret
+define <16 x i8> @_inreg16xi8(<16 x i8> %a) {
+ %b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer
+ ret <16 x i8> %b
+}
diff --git a/test/CodeGen/X86/basic-promote-integers.ll b/test/CodeGen/X86/basic-promote-integers.ll
index c80f2b03343e..fce6b7f5565c 100644
--- a/test/CodeGen/X86/basic-promote-integers.ll
+++ b/test/CodeGen/X86/basic-promote-integers.ll
@@ -1,7 +1,7 @@
; Test that vectors are scalarized/lowered correctly
; (with both legalization methods).
-; RUN: llc -march=x86 -promote-elements < %s
-; RUN: llc -march=x86 < %s
+; RUN: llc -march=x86 < %s
+; RUN: llc -march=x86 < %s
; A simple test to check copyToParts and copyFromParts.
diff --git a/test/CodeGen/X86/bigstructret.ll b/test/CodeGen/X86/bigstructret.ll
index 633995d5d788..3c499fae820f 100644
--- a/test/CodeGen/X86/bigstructret.ll
+++ b/test/CodeGen/X86/bigstructret.ll
@@ -1,12 +1,15 @@
-; RUN: llc < %s -march=x86 -o %t
-; RUN: grep "movl .24601, 12(%ecx)" %t
-; RUN: grep "movl .48, 8(%ecx)" %t
-; RUN: grep "movl .24, 4(%ecx)" %t
-; RUN: grep "movl .12, (%ecx)" %t
+; RUN: llc < %s -march=x86 | FileCheck %s
%0 = type { i32, i32, i32, i32 }
+%1 = type { i1, i1, i1, i32 }
-define internal fastcc %0 @ReturnBigStruct() nounwind readnone {
+; CHECK: ReturnBigStruct
+; CHECK: movl $24601, 12(%ecx)
+; CHECK: movl $48, 8(%ecx)
+; CHECK: movl $24, 4(%ecx)
+; CHECK: movl $12, (%ecx)
+
+define fastcc %0 @ReturnBigStruct() nounwind readnone {
entry:
%0 = insertvalue %0 zeroinitializer, i32 12, 0
%1 = insertvalue %0 %0, i32 24, 1
@@ -15,3 +18,17 @@ entry:
ret %0 %3
}
+; CHECK: ReturnBigStruct2
+; CHECK: movl $48, 4(%ecx)
+; CHECK: movb $1, 2(%ecx)
+; CHECK: movb $1, 1(%ecx)
+; CHECK: movb $0, (%ecx)
+
+define fastcc %1 @ReturnBigStruct2() nounwind readnone {
+entry:
+ %0 = insertvalue %1 zeroinitializer, i1 false, 0
+ %1 = insertvalue %1 %0, i1 true, 1
+ %2 = insertvalue %1 %1, i1 true, 2
+ %3 = insertvalue %1 %2, i32 48, 3
+ ret %1 %3
+}
diff --git a/test/CodeGen/X86/blend-msb.ll b/test/CodeGen/X86/blend-msb.ll
index 3a10c70ada85..11f811f8cf63 100644
--- a/test/CodeGen/X86/blend-msb.ll
+++ b/test/CodeGen/X86/blend-msb.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -promote-elements -mattr=+sse41 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse41 | FileCheck %s
; In this test we check that sign-extend of the mask bit is performed by
diff --git a/test/CodeGen/X86/block-placement.ll b/test/CodeGen/X86/block-placement.ll
index fc7b6383b8b0..5534712af832 100644
--- a/test/CodeGen/X86/block-placement.ll
+++ b/test/CodeGen/X86/block-placement.ll
@@ -7,10 +7,15 @@ define i32 @test_ifchains(i32 %i, i32* %a, i32 %b) {
; that is not expected to run.
; CHECK: test_ifchains:
; CHECK: %entry
+; CHECK-NOT: .align
; CHECK: %else1
+; CHECK-NOT: .align
; CHECK: %else2
+; CHECK-NOT: .align
; CHECK: %else3
+; CHECK-NOT: .align
; CHECK: %else4
+; CHECK-NOT: .align
; CHECK: %exit
; CHECK: %then1
; CHECK: %then2
@@ -76,8 +81,11 @@ define i32 @test_loop_cold_blocks(i32 %i, i32* %a) {
; Check that we sink cold loop blocks after the hot loop body.
; CHECK: test_loop_cold_blocks:
; CHECK: %entry
+; CHECK-NOT: .align
; CHECK: %unlikely1
+; CHECK-NOT: .align
; CHECK: %unlikely2
+; CHECK: .align
; CHECK: %body1
; CHECK: %body2
; CHECK: %body3
@@ -634,7 +642,7 @@ define void @test_unnatural_cfg_backwards_inner_loop() {
;
; CHECK: test_unnatural_cfg_backwards_inner_loop
; CHECK: %entry
-; CHECK: %body
+; CHECK: [[BODY:# BB#[0-9]+]]:
; CHECK: %loop2b
; CHECK: %loop1
; CHECK: %loop2a
diff --git a/test/CodeGen/X86/bool-simplify.ll b/test/CodeGen/X86/bool-simplify.ll
new file mode 100644
index 000000000000..0cb9fd9bc533
--- /dev/null
+++ b/test/CodeGen/X86/bool-simplify.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse41,-avx | FileCheck %s
+
+define i32 @foo(<2 x i64> %c, i32 %a, i32 %b) {
+ %t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> %c)
+ %t2 = icmp ne i32 %t1, 0
+ %t3 = select i1 %t2, i32 %a, i32 %b
+ ret i32 %t3
+; CHECK: foo
+; CHECK: ptest
+; CHECK-NOT: testl
+; CHECK: cmov
+; CHECK: ret
+}
+
+define i32 @bar(<2 x i64> %c) {
+entry:
+ %0 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> %c)
+ %1 = icmp ne i32 %0, 0
+ br i1 %1, label %if-true-block, label %endif-block
+if-true-block: ; preds = %entry
+ ret i32 0
+endif-block: ; preds = %entry,
+ ret i32 1
+; CHECK: bar
+; CHECK: ptest
+; CHECK-NOT: testl
+; CHECK: jne
+; CHECK: ret
+}
+
+define i32 @bax(<2 x i64> %c) {
+ %t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> %c)
+ %t2 = icmp eq i32 %t1, 1
+ %t3 = zext i1 %t2 to i32
+ ret i32 %t3
+; CHECK: bax
+; CHECK: ptest
+; CHECK-NOT: cmpl
+; CHECK: ret
+}
+
+declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/X86/br-fold.ll b/test/CodeGen/X86/br-fold.ll
index 2c3719493801..522346301162 100644
--- a/test/CodeGen/X86/br-fold.ll
+++ b/test/CodeGen/X86/br-fold.ll
@@ -1,7 +1,7 @@
; RUN: llc -march=x86-64 < %s | FileCheck %s
; CHECK: orq
-; CHECK-NEXT: LBB0_1
+; CHECK-NEXT: %bb8.i329
@_ZN11xercesc_2_513SchemaSymbols21fgURI_SCHEMAFORSCHEMAE = external constant [33 x i16], align 32 ; <[33 x i16]*> [#uses=1]
@_ZN11xercesc_2_56XMLUni16fgNotationStringE = external constant [9 x i16], align 16 ; <[9 x i16]*> [#uses=1]
diff --git a/test/CodeGen/X86/break-anti-dependencies.ll b/test/CodeGen/X86/break-anti-dependencies.ll
index 93b20437e1e8..c94261467c9d 100644
--- a/test/CodeGen/X86/break-anti-dependencies.ll
+++ b/test/CodeGen/X86/break-anti-dependencies.ll
@@ -1,10 +1,12 @@
; Without list-burr scheduling we may not see the difference in codegen here.
-; RUN: llc < %s -march=x86-64 -post-RA-scheduler -pre-RA-sched=list-burr -break-anti-dependencies=none > %t
-; RUN: grep {%xmm0} %t | count 14
-; RUN: not grep {%xmm1} %t
-; RUN: llc < %s -march=x86-64 -post-RA-scheduler -break-anti-dependencies=critical > %t
-; RUN: grep {%xmm0} %t | count 7
-; RUN: grep {%xmm1} %t | count 7
+; Use a subtarget that has post-RA scheduling enabled because the anti-dependency
+; breaker requires liveness information to be kept.
+; RUN: llc < %s -march=x86-64 -mcpu=atom -post-RA-scheduler -pre-RA-sched=list-burr -break-anti-dependencies=none > %t
+; RUN: grep "%xmm0" %t | count 14
+; RUN: not grep "%xmm1" %t
+; RUN: llc < %s -march=x86-64 -mcpu=atom -post-RA-scheduler -break-anti-dependencies=critical > %t
+; RUN: grep "%xmm0" %t | count 7
+; RUN: grep "%xmm1" %t | count 7
define void @goo(double* %r, double* %p, double* %q) nounwind {
entry:
diff --git a/test/CodeGen/X86/break-sse-dep.ll b/test/CodeGen/X86/break-sse-dep.ll
index 2dee5754256a..4d801891da5c 100644
--- a/test/CodeGen/X86/break-sse-dep.ll
+++ b/test/CodeGen/X86/break-sse-dep.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse2 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse2 -mcpu=nehalem | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse2 -mcpu=nehalem | FileCheck %s
define double @t1(float* nocapture %x) nounwind readonly ssp {
entry:
@@ -34,8 +34,7 @@ entry:
define double @squirt(double* %x) nounwind {
entry:
; CHECK: squirt:
-; CHECK: movsd ([[A0]]), %xmm0
-; CHECK: sqrtsd %xmm0, %xmm0
+; CHECK: sqrtsd ([[A0]]), %xmm0
%z = load double* %x
%t = call double @llvm.sqrt.f64(double %z)
ret double %t
diff --git a/test/CodeGen/X86/call-imm.ll b/test/CodeGen/X86/call-imm.ll
index 3857fb157905..38cda4d14040 100644
--- a/test/CodeGen/X86/call-imm.ll
+++ b/test/CodeGen/X86/call-imm.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=static | grep {call.*12345678}
-; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic | not grep {call.*12345678}
-; RUN: llc < %s -mtriple=i386-pc-linux -relocation-model=dynamic-no-pic | grep {call.*12345678}
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=static | grep "call.*12345678"
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic | not grep "call.*12345678"
+; RUN: llc < %s -mtriple=i386-pc-linux -relocation-model=dynamic-no-pic | grep "call.*12345678"
; Call to immediate is not safe on x86-64 unless we *know* that the
; call will be within 32-bits pcrel from the dest immediate.
-; RUN: llc < %s -march=x86-64 | grep {call.*\\*%rax}
+; RUN: llc < %s -march=x86-64 | grep "call.*\*%rax"
; PR3666
; PR3773
diff --git a/test/CodeGen/X86/cfstring.ll b/test/CodeGen/X86/cfstring.ll
index 7420ce730475..8cdd59e9ae93 100644
--- a/test/CodeGen/X86/cfstring.ll
+++ b/test/CodeGen/X86/cfstring.ll
@@ -4,7 +4,7 @@
%0 = type opaque
%struct.NSConstantString = type { i32*, i32, i8*, i32 }
-; Make sure that the string ends up the the correct section.
+; Make sure that the string ends up the correct section.
; CHECK: .section __TEXT,__cstring
; CHECK-NEXT: l_.str3:
diff --git a/test/CodeGen/X86/cmov-into-branch.ll b/test/CodeGen/X86/cmov-into-branch.ll
new file mode 100644
index 000000000000..780746ab1ae4
--- /dev/null
+++ b/test/CodeGen/X86/cmov-into-branch.ll
@@ -0,0 +1,63 @@
+; RUN: llc -march=x86-64 -mcpu=core2 < %s | FileCheck %s
+
+; cmp with single-use load, should not form cmov.
+define i32 @test1(double %a, double* nocapture %b, i32 %x, i32 %y) {
+ %load = load double* %b, align 8
+ %cmp = fcmp olt double %load, %a
+ %cond = select i1 %cmp, i32 %x, i32 %y
+ ret i32 %cond
+; CHECK: test1:
+; CHECK: ucomisd
+; CHECK-NOT: cmov
+; CHECK: j
+; CHECK-NOT: cmov
+}
+
+; Sanity check: no load.
+define i32 @test2(double %a, double %b, i32 %x, i32 %y) {
+ %cmp = fcmp ogt double %a, %b
+ %cond = select i1 %cmp, i32 %x, i32 %y
+ ret i32 %cond
+; CHECK: test2:
+; CHECK: ucomisd
+; CHECK: cmov
+}
+
+; Multiple uses of %a, should not form cmov.
+define i32 @test3(i32 %a, i32* nocapture %b, i32 %x) {
+ %load = load i32* %b, align 4
+ %cmp = icmp ult i32 %load, %a
+ %cond = select i1 %cmp, i32 %a, i32 %x
+ ret i32 %cond
+; CHECK: test3:
+; CHECK: cmpl
+; CHECK-NOT: cmov
+; CHECK: j
+; CHECK-NOT: cmov
+}
+
+; Multiple uses of the load.
+define i32 @test4(i32 %a, i32* nocapture %b, i32 %x, i32 %y) {
+ %load = load i32* %b, align 4
+ %cmp = icmp ult i32 %load, %a
+ %cond = select i1 %cmp, i32 %x, i32 %y
+ %add = add i32 %cond, %load
+ ret i32 %add
+; CHECK: test4:
+; CHECK: cmpl
+; CHECK: cmov
+}
+
+; Multiple uses of the cmp.
+define i32 @test5(i32 %a, i32* nocapture %b, i32 %x, i32 %y) {
+ %load = load i32* %b, align 4
+ %cmp = icmp ult i32 %load, %a
+ %cmp1 = icmp ugt i32 %load, %a
+ %cond = select i1 %cmp1, i32 %a, i32 %y
+ %cond5 = select i1 %cmp, i32 %cond, i32 %x
+ ret i32 %cond5
+; CHECK: test5:
+; CHECK: cmpl
+; CHECK: cmov
+; CHECK: cmov
+}
diff --git a/test/CodeGen/X86/cmov.ll b/test/CodeGen/X86/cmov.ll
index 2e7ffbfd546d..ed25c82fddac 100644
--- a/test/CodeGen/X86/cmov.ll
+++ b/test/CodeGen/X86/cmov.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -disable-cgp-select2branch | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
define i32 @test1(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
entry:
; CHECK: test1:
-; CHECK: btl
-; CHECK-NEXT: movl $12, %eax
+; CHECK: movl $12, %eax
+; CHECK-NEXT: btl
; CHECK-NEXT: cmovael (%rcx), %eax
; CHECK-NEXT: ret
@@ -19,8 +19,8 @@ entry:
define i32 @test2(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
entry:
; CHECK: test2:
-; CHECK: btl
-; CHECK-NEXT: movl $12, %eax
+; CHECK: movl $12, %eax
+; CHECK-NEXT: btl
; CHECK-NEXT: cmovbl (%rcx), %eax
; CHECK-NEXT: ret
diff --git a/test/CodeGen/X86/cmp.ll b/test/CodeGen/X86/cmp.ll
index ef5e353e9f9f..eb06327f55a6 100644
--- a/test/CodeGen/X86/cmp.ll
+++ b/test/CodeGen/X86/cmp.ll
@@ -90,3 +90,64 @@ F:
; CHECK: encoding: [0x48,0x83,0x7c,0x24,0xf8,0x00]
}
+; rdar://11866926
+define i32 @test7(i64 %res) nounwind {
+entry:
+; CHECK: test7:
+; CHECK-NOT: movabsq
+; CHECK: shrq $32, %rdi
+; CHECK: testq %rdi, %rdi
+; CHECK: sete
+ %lnot = icmp ult i64 %res, 4294967296
+ %lnot.ext = zext i1 %lnot to i32
+ ret i32 %lnot.ext
+}
+
+define i32 @test8(i64 %res) nounwind {
+entry:
+; CHECK: test8:
+; CHECK-NOT: movabsq
+; CHECK: shrq $32, %rdi
+; CHECK: cmpq $3, %rdi
+ %lnot = icmp ult i64 %res, 12884901888
+ %lnot.ext = zext i1 %lnot to i32
+ ret i32 %lnot.ext
+}
+
+define i32 @test9(i64 %res) nounwind {
+entry:
+; CHECK: test9:
+; CHECK-NOT: movabsq
+; CHECK: shrq $33, %rdi
+; CHECK: testq %rdi, %rdi
+; CHECK: sete
+ %lnot = icmp ult i64 %res, 8589934592
+ %lnot.ext = zext i1 %lnot to i32
+ ret i32 %lnot.ext
+}
+
+define i32 @test10(i64 %res) nounwind {
+entry:
+; CHECK: test10:
+; CHECK-NOT: movabsq
+; CHECK: shrq $32, %rdi
+; CHECK: testq %rdi, %rdi
+; CHECK: setne
+ %lnot = icmp uge i64 %res, 4294967296
+ %lnot.ext = zext i1 %lnot to i32
+ ret i32 %lnot.ext
+}
+
+; rdar://9758774
+define i32 @test11(i64 %l) nounwind {
+entry:
+; CHECK: test11:
+; CHECK-NOT: movabsq
+; CHECK-NOT: andq
+; CHECK: shrq $47, %rdi
+; CHECK: cmpq $1, %rdi
+ %shr.mask = and i64 %l, -140737488355328
+ %cmp = icmp eq i64 %shr.mask, 140737488355328
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
diff --git a/test/CodeGen/X86/coalesce-esp.ll b/test/CodeGen/X86/coalesce-esp.ll
index a5848763c98d..400437993879 100644
--- a/test/CodeGen/X86/coalesce-esp.ll
+++ b/test/CodeGen/X86/coalesce-esp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | grep {movl %esp, %ebp}
+; RUN: llc < %s | grep "movl %esp, %ebp"
; PR4572
; Don't coalesce with %esp if it would end up putting %esp in
diff --git a/test/CodeGen/X86/coalescer-commute2.ll b/test/CodeGen/X86/coalescer-commute2.ll
index 6e5c1cf63006..e45437cc9484 100644
--- a/test/CodeGen/X86/coalescer-commute2.ll
+++ b/test/CodeGen/X86/coalescer-commute2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux -join-physregs | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=nehalem | FileCheck %s
; CHECK-NOT: mov
; CHECK: paddw
; CHECK-NOT: mov
@@ -26,14 +26,3 @@ entry:
%tmp10 = bitcast <8 x i16> %tmp9 to <2 x i64> ; <<2 x i64>> [#uses=1]
ret <2 x i64> %tmp10
}
-
-
-; The coalescer should commute the add to avoid a copy.
-define <4 x float> @test3(<4 x float> %V) {
-entry:
- %tmp8 = shufflevector <4 x float> %V, <4 x float> undef,
- <4 x i32> < i32 3, i32 2, i32 1, i32 0 >
- %add = fadd <4 x float> %tmp8, %V
- ret <4 x float> %add
-}
-
diff --git a/test/CodeGen/X86/coalescer-dce2.ll b/test/CodeGen/X86/coalescer-dce2.ll
new file mode 100644
index 000000000000..bbbf09b267b9
--- /dev/null
+++ b/test/CodeGen/X86/coalescer-dce2.ll
@@ -0,0 +1,118 @@
+; RUN: llc < %s -verify-coalescing
+; PR12911
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.7.0"
+
+@d = common global i32 0, align 4
+@c = common global i32 0, align 4
+@b = common global i32 0, align 4
+@h = common global i32 0, align 4
+@f = common global i32 0, align 4
+@g = common global i32 0, align 4
+@a = common global i16 0, align 2
+@e = common global i32 0, align 4
+
+define void @fn1() nounwind uwtable ssp {
+entry:
+ %0 = load i32* @d, align 4
+ %tobool72 = icmp eq i32 %0, 0
+ br i1 %tobool72, label %for.end32, label %for.cond1.preheader.lr.ph
+
+for.cond1.preheader.lr.ph: ; preds = %entry
+ %1 = load i32* @c, align 4
+ %tobool2 = icmp eq i32 %1, 0
+ %2 = load i32* @b, align 4
+ %cmp = icmp sgt i32 %2, 0
+ %conv = zext i1 %cmp to i32
+ %3 = load i32* @g, align 4
+ %tobool4 = icmp eq i32 %3, 0
+ %4 = load i16* @a, align 2
+ %tobool9 = icmp eq i16 %4, 0
+ br label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %for.cond25.loopexit.us-lcssa.us-lcssa, %if.end.us50, %if.end.us, %if.end.us.us, %for.cond1.preheader.lr.ph
+ %j.073 = phi i32 [ undef, %for.cond1.preheader.lr.ph ], [ %j.1.us.us, %if.end.us.us ], [ %j.1.us, %if.end.us ], [ %j.073, %for.cond25.loopexit.us-lcssa.us-lcssa ], [ %j.1.us36, %if.end.us50 ]
+ br i1 %tobool2, label %for.cond1.preheader.split.us, label %for.cond1.preheader.for.cond1.preheader.split_crit_edge
+
+for.cond1.preheader.for.cond1.preheader.split_crit_edge: ; preds = %for.cond1.preheader
+ br i1 %tobool9, label %if.end.us50, label %for.cond1.preheader.split.for.cond1.preheader.split.split_crit_edge
+
+for.cond1.preheader.split.us: ; preds = %for.cond1.preheader
+ br i1 %tobool9, label %cond.end.us.us, label %cond.end.us
+
+cond.false18.us.us: ; preds = %if.end.us.us
+ %5 = load i32* @f, align 4
+ %sext76 = shl i32 %5, 16
+ %phitmp75 = ashr exact i32 %sext76, 16
+ br label %cond.end.us.us
+
+if.end.us.us: ; preds = %cond.end.us.us, %if.then.us.us
+ br i1 %tobool4, label %cond.false18.us.us, label %for.cond1.preheader
+
+if.then.us.us: ; preds = %cond.end.us.us
+ store i32 0, i32* @f, align 4
+ br label %if.end.us.us
+
+cond.end.us.us: ; preds = %cond.false18.us.us, %for.cond1.preheader.split.us
+ %j.1.us.us = phi i32 [ %j.073, %for.cond1.preheader.split.us ], [ %phitmp75, %cond.false18.us.us ]
+ store i32 %conv, i32* @h, align 4
+ br i1 %cmp, label %if.then.us.us, label %if.end.us.us
+
+cond.end21.us: ; preds = %land.lhs.true12.us, %cond.false18.us
+ %cond22.us = phi i16 [ %add.us, %cond.false18.us ], [ %4, %land.lhs.true12.us ]
+ %conv24.us = sext i16 %cond22.us to i32
+ br label %cond.end.us
+
+cond.false18.us: ; preds = %if.end6.us, %land.lhs.true12.us
+ %add.us = add i16 %4, %conv7.us
+ br label %cond.end21.us
+
+land.lhs.true12.us: ; preds = %if.end6.us
+ %conv10.us = sext i16 %conv7.us to i32
+ %sub.us = sub nsw i32 0, %conv10.us
+ %cmp14.us = icmp slt i32 %sub.us, 1
+ br i1 %cmp14.us, label %cond.end21.us, label %cond.false18.us
+
+if.end6.us: ; preds = %if.end.us
+ %6 = load i32* @f, align 4
+ %conv7.us = trunc i32 %6 to i16
+ %tobool11.us = icmp eq i16 %conv7.us, 0
+ br i1 %tobool11.us, label %cond.false18.us, label %land.lhs.true12.us
+
+if.end.us: ; preds = %cond.end.us, %if.then.us
+ br i1 %tobool4, label %if.end6.us, label %for.cond1.preheader
+
+if.then.us: ; preds = %cond.end.us
+ store i32 0, i32* @f, align 4
+ br label %if.end.us
+
+cond.end.us: ; preds = %cond.end21.us, %for.cond1.preheader.split.us
+ %j.1.us = phi i32 [ %conv24.us, %cond.end21.us ], [ %j.073, %for.cond1.preheader.split.us ]
+ store i32 %conv, i32* @h, align 4
+ br i1 %cmp, label %if.then.us, label %if.end.us
+
+for.cond1.preheader.split.for.cond1.preheader.split.split_crit_edge: ; preds = %for.cond1.preheader.for.cond1.preheader.split_crit_edge
+ br i1 %tobool4, label %if.end6.us65, label %for.cond25.loopexit.us-lcssa.us-lcssa
+
+cond.false18.us40: ; preds = %if.end.us50
+ %7 = load i32* @f, align 4
+ %sext = shl i32 %7, 16
+ %phitmp = ashr exact i32 %sext, 16
+ br label %if.end.us50
+
+if.end.us50: ; preds = %cond.false18.us40, %for.cond1.preheader.for.cond1.preheader.split_crit_edge
+ %j.1.us36 = phi i32 [ %j.073, %for.cond1.preheader.for.cond1.preheader.split_crit_edge ], [ %phitmp, %cond.false18.us40 ]
+ store i32 0, i32* @h, align 4
+ br i1 %tobool4, label %cond.false18.us40, label %for.cond1.preheader
+
+if.end6.us65: ; preds = %if.end6.us65, %for.cond1.preheader.split.for.cond1.preheader.split.split_crit_edge
+ store i32 0, i32* @h, align 4
+ br label %if.end6.us65
+
+for.cond25.loopexit.us-lcssa.us-lcssa: ; preds = %for.cond1.preheader.split.for.cond1.preheader.split.split_crit_edge
+ store i32 0, i32* @h, align 4
+ br label %for.cond1.preheader
+
+for.end32: ; preds = %entry
+ ret void
+}
diff --git a/test/CodeGen/X86/coalescer-identity.ll b/test/CodeGen/X86/coalescer-identity.ll
new file mode 100644
index 000000000000..9c72ee6296bd
--- /dev/null
+++ b/test/CodeGen/X86/coalescer-identity.ll
@@ -0,0 +1,82 @@
+; RUN: llc < %s -verify-coalescing
+; PR12927
+target triple = "x86_64-apple-macosx10.8.0"
+
+; This is a case where removeCopyByCommutingDef() creates an identity copy that
+; joinCopy must then deal with correctly.
+
+@s = common global i16 0, align 2
+@g1 = common global i32 0, align 4
+@g2 = common global i32 0, align 4
+@g0 = common global i32 0, align 4
+
+define void @func() nounwind uwtable ssp {
+for.body.lr.ph:
+ %0 = load i32* @g2, align 4, !tbaa !0
+ %tobool6 = icmp eq i32 %0, 0
+ %s.promoted = load i16* @s, align 2
+ %.pre = load i32* @g1, align 4, !tbaa !0
+ br i1 %tobool6, label %for.body.us, label %for.body
+
+for.body.us: ; preds = %for.body.lr.ph, %for.inc.us
+ %1 = phi i32 [ %3, %for.inc.us ], [ %.pre, %for.body.lr.ph ]
+ %dec13.us = phi i16 [ %dec12.us, %for.inc.us ], [ %s.promoted, %for.body.lr.ph ]
+ %i.011.us = phi i32 [ %inc.us, %for.inc.us ], [ undef, %for.body.lr.ph ]
+ %v.010.us = phi i32 [ %phitmp.us, %for.inc.us ], [ 1, %for.body.lr.ph ]
+ %tobool1.us = icmp ne i32 %v.010.us, 0
+ %2 = zext i1 %tobool1.us to i16
+ %lnot.ext.us = xor i16 %2, 1
+ %add.us = add i16 %dec13.us, %lnot.ext.us
+ %conv3.us = zext i16 %add.us to i32
+ %add4.us = sub i32 0, %1
+ %tobool5.us = icmp eq i32 %conv3.us, %add4.us
+ br i1 %tobool5.us, label %for.inc.us, label %if.then7.us
+
+for.inc.us: ; preds = %cond.end.us, %for.body.us
+ %3 = phi i32 [ %1, %for.body.us ], [ %4, %cond.end.us ]
+ %dec12.us = phi i16 [ %add.us, %for.body.us ], [ %dec.us, %cond.end.us ]
+ %inc.us = add i32 %i.011.us, 1
+ %phitmp.us = udiv i32 %v.010.us, 12
+ %tobool.us = icmp eq i32 %inc.us, 0
+ br i1 %tobool.us, label %for.end, label %for.body.us
+
+cond.end.us: ; preds = %if.then7.us, %cond.false.us
+ %4 = phi i32 [ 0, %cond.false.us ], [ %1, %if.then7.us ]
+ %cond.us = phi i32 [ 0, %cond.false.us ], [ %v.010.us, %if.then7.us ]
+ store i32 %cond.us, i32* @g0, align 4, !tbaa !0
+ br label %for.inc.us
+
+cond.false.us: ; preds = %if.then7.us
+ store i32 0, i32* @g1, align 4, !tbaa !0
+ br label %cond.end.us
+
+if.then7.us: ; preds = %for.body.us
+ %dec.us = add i16 %add.us, -1
+ br i1 %tobool1.us, label %cond.end.us, label %cond.false.us
+
+for.body: ; preds = %for.body.lr.ph, %for.body
+ %dec13 = phi i16 [ %dec12, %for.body ], [ %s.promoted, %for.body.lr.ph ]
+ %i.011 = phi i32 [ %inc, %for.body ], [ undef, %for.body.lr.ph ]
+ %v.010 = phi i32 [ %phitmp, %for.body ], [ 1, %for.body.lr.ph ]
+ %tobool1 = icmp eq i32 %v.010, 0
+ %lnot.ext = zext i1 %tobool1 to i16
+ %add = add i16 %dec13, %lnot.ext
+ %conv3 = zext i16 %add to i32
+ %add4 = sub i32 0, %.pre
+ %not.tobool5 = icmp ne i32 %conv3, %add4
+ %dec = sext i1 %not.tobool5 to i16
+ %dec12 = add i16 %add, %dec
+ %inc = add i32 %i.011, 1
+ %phitmp = udiv i32 %v.010, 12
+ %tobool = icmp eq i32 %inc, 0
+ br i1 %tobool, label %for.end, label %for.body
+
+for.end: ; preds = %for.inc.us, %for.body
+ %dec12.lcssa = phi i16 [ %dec12.us, %for.inc.us ], [ %dec12, %for.body ]
+ store i16 %dec12.lcssa, i16* @s, align 2
+ ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/constant-pool-sharing.ll b/test/CodeGen/X86/constant-pool-sharing.ll
index f979945835ff..26318dd6c558 100644
--- a/test/CodeGen/X86/constant-pool-sharing.ll
+++ b/test/CodeGen/X86/constant-pool-sharing.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=corei7 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=corei7 | FileCheck %s
; llc should share constant pool entries between this integer vector
; and this floating-point vector since they have the same encoding.
diff --git a/test/CodeGen/X86/constructor.ll b/test/CodeGen/X86/constructor.ll
new file mode 100644
index 000000000000..b57889643e0d
--- /dev/null
+++ b/test/CodeGen/X86/constructor.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple x86_64-pc-linux < %s | FileCheck --check-prefix=CTOR %s
+; RUN: llc -mtriple x86_64-pc-linux -use-init-array < %s | FileCheck --check-prefix=INIT-ARRAY %s
+@llvm.global_ctors = appending global [2 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @f }, { i32, void ()* } { i32 15, void ()* @g }]
+
+define void @f() {
+entry:
+ ret void
+}
+
+define void @g() {
+entry:
+ ret void
+}
+
+; CTOR: .section .ctors.65520,"aw",@progbits
+; CTOR-NEXT: .align 8
+; CTOR-NEXT: .quad g
+; CTOR-NEXT: .section .ctors,"aw",@progbits
+; CTOR-NEXT: .align 8
+; CTOR-NEXT: .quad f
+
+; INIT-ARRAY: .section .init_array.15,"aw",@init_array
+; INIT-ARRAY-NEXT: .align 8
+; INIT-ARRAY-NEXT: .quad g
+; INIT-ARRAY-NEXT: .section .init_array,"aw",@init_array
+; INIT-ARRAY-NEXT: .align 8
+; INIT-ARRAY-NEXT: .quad f
diff --git a/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
index b82348b32e43..064ee364d14e 100644
--- a/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
+++ b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux -o /dev/null -stats |& FileCheck %s -check-prefix=STATS
-; RUN: llc < %s -mtriple=x86_64-win32 -o /dev/null -stats |& FileCheck %s -check-prefix=STATS
+; RUN: llc < %s -mtriple=x86_64-linux -o /dev/null -stats 2>&1 | FileCheck %s -check-prefix=STATS
+; RUN: llc < %s -mtriple=x86_64-win32 -o /dev/null -stats 2>&1 | FileCheck %s -check-prefix=STATS
; STATS: 9 asm-printer
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll
index cf6e27d15972..9badfc82e99c 100644
--- a/test/CodeGen/X86/crash.ll
+++ b/test/CodeGen/X86/crash.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=x86 %s -o -
-; RUN: llc -march=x86-64 %s -o -
+; RUN: llc -march=x86 < %s -verify-machineinstrs
+; RUN: llc -march=x86-64 < %s -verify-machineinstrs
; PR6497
@@ -391,3 +391,54 @@ if.end:
%t11 = tail call i64 asm sideeffect "foo", "=*m,=A,{bx},{cx},1,~{memory},~{dirflag},~{fpsr},~{flags}"(i64* %t6, i32 0, i32 0, i64 0) nounwind
ret void
}
+
+; Avoid emitting wrong kill flags from InstrEmitter.
+; InstrEmitter::EmitSubregNode() may steal virtual registers from already
+; emitted blocks when isCoalescableExtInstr points out the opportunity.
+; Make sure kill flags are cleared on the newly global virtual register.
+define i64 @ov_read(i8* %vf, i8* nocapture %buffer, i32 %length, i32 %bigendianp, i32 %word, i32 %sgned, i32* %bitstream) nounwind uwtable ssp {
+entry:
+ br i1 undef, label %return, label %while.body.preheader
+
+while.body.preheader: ; preds = %entry
+ br i1 undef, label %if.then3, label %if.end7
+
+if.then3: ; preds = %while.body.preheader
+ %0 = load i32* undef, align 4
+ br i1 undef, label %land.lhs.true.i255, label %if.end7
+
+land.lhs.true.i255: ; preds = %if.then3
+ br i1 undef, label %if.then.i256, label %if.end7
+
+if.then.i256: ; preds = %land.lhs.true.i255
+ %sub.i = sub i32 0, %0
+ %conv = sext i32 %sub.i to i64
+ br i1 undef, label %if.end7, label %while.end
+
+if.end7: ; preds = %if.then.i256, %land.lhs.true.i255, %if.then3, %while.body.preheader
+ unreachable
+
+while.end: ; preds = %if.then.i256
+ %cmp18 = icmp sgt i32 %sub.i, 0
+ %.conv = select i1 %cmp18, i64 -131, i64 %conv
+ ret i64 %.conv
+
+return: ; preds = %entry
+ ret i64 -131
+}
+
+; The tail call to a varargs function sets %AL.
+; uitofp expands to an FCMOV instruction which splits the basic block.
+; Make sure the live range of %AL isn't split.
+@.str = private unnamed_addr constant { [1 x i8], [63 x i8] } zeroinitializer, align 32
+define void @pr13188(i64* nocapture %this) uwtable ssp address_safety align 2 {
+entry:
+ %x7 = load i64* %this, align 8
+ %sub = add i64 %x7, -1
+ %conv = uitofp i64 %sub to float
+ %div = fmul float %conv, 5.000000e-01
+ %conv2 = fpext float %div to double
+ tail call void (...)* @_Z6PrintFz(i8* getelementptr inbounds ({ [1 x i8], [63 x i8] }* @.str, i64 0, i32 0, i64 0), double %conv2)
+ ret void
+}
+declare void @_Z6PrintFz(...)
diff --git a/test/CodeGen/X86/ctpop-combine.ll b/test/CodeGen/X86/ctpop-combine.ll
index 6406cc73e412..0a3dfca228c1 100644
--- a/test/CodeGen/X86/ctpop-combine.ll
+++ b/test/CodeGen/X86/ctpop-combine.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=x86-64 < %s | FileCheck %s
+; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s
declare i64 @llvm.ctpop.i64(i64) nounwind readnone
diff --git a/test/CodeGen/X86/dagcombine-cse.ll b/test/CodeGen/X86/dagcombine-cse.ll
index c3c7990d19eb..af69531246cf 100644
--- a/test/CodeGen/X86/dagcombine-cse.ll
+++ b/test/CodeGen/X86/dagcombine-cse.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -stats |& grep asm-printer | grep 14
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -stats 2>&1 | grep asm-printer | grep 14
define i32 @t(i8* %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) nounwind {
entry:
diff --git a/test/CodeGen/X86/dbg-merge-loc-entry.ll b/test/CodeGen/X86/dbg-merge-loc-entry.ll
index c35935f015ac..d1e349f79d6f 100644
--- a/test/CodeGen/X86/dbg-merge-loc-entry.ll
+++ b/test/CodeGen/X86/dbg-merge-loc-entry.ll
@@ -10,7 +10,7 @@ target triple = "x86_64-apple-darwin8"
;CHECK-NEXT: .short Lset
;CHECK-NEXT: Ltmp
;CHECK-NEXT: .byte 85 ## DW_OP_reg5
-;CHECK-NEXT: Ltmp5
+;CHECK-NEXT: Ltmp
;CHECK-NEXT: .quad 0
;CHECK-NEXT: .quad 0
diff --git a/test/CodeGen/X86/dbg-value-range.ll b/test/CodeGen/X86/dbg-value-range.ll
index 28d873bfba6f..6b16865ba9ee 100644
--- a/test/CodeGen/X86/dbg-value-range.ll
+++ b/test/CodeGen/X86/dbg-value-range.ll
@@ -1,5 +1,4 @@
; RUN: llc -mtriple=x86_64-apple-darwin10 < %s | FileCheck %s
-; RUN: llc -mtriple=x86_64-apple-darwin10 -regalloc=basic -join-physregs < %s | FileCheck %s
%struct.a = type { i32 }
diff --git a/test/CodeGen/X86/divide-by-constant.ll b/test/CodeGen/X86/divide-by-constant.ll
index e577ecb85aa8..8e7c13d8efa9 100644
--- a/test/CodeGen/X86/divide-by-constant.ll
+++ b/test/CodeGen/X86/divide-by-constant.ll
@@ -71,3 +71,24 @@ define i32 @test7(i32 %x) nounwind {
; CHECK-NOT: shrl
; CHECK: ret
}
+
+; PR13326
+define i8 @test8(i8 %x) nounwind {
+ %div = udiv i8 %x, 78
+ ret i8 %div
+; CHECK: test8:
+; CHECK: shrb %
+; CHECK: imull $211
+; CHECK: shrl $13
+; CHECK: ret
+}
+
+define i8 @test9(i8 %x) nounwind {
+ %div = udiv i8 %x, 116
+ ret i8 %div
+; CHECK: test9:
+; CHECK: shrb $2
+; CHECK: imull $71
+; CHECK: shrl $11
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/dynamic-allocas-VLAs.ll b/test/CodeGen/X86/dynamic-allocas-VLAs.ll
new file mode 100644
index 000000000000..c5e47facf346
--- /dev/null
+++ b/test/CodeGen/X86/dynamic-allocas-VLAs.ll
@@ -0,0 +1,237 @@
+; RUN: llc < %s -mcpu=generic -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -force-align-stack -stack-alignment=32 -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s -check-prefix=FORCE-ALIGN
+; rdar://11496434
+
+; no VLAs or dynamic alignment
+define i32 @t1() nounwind uwtable ssp {
+entry:
+ %a = alloca i32, align 4
+ call void @t1_helper(i32* %a) nounwind
+ %0 = load i32* %a, align 4
+ %add = add nsw i32 %0, 13
+ ret i32 %add
+
+; CHECK: _t1
+; CHECK-NOT: andq $-{{[0-9]+}}, %rsp
+; CHECK: leaq [[OFFSET:[0-9]*]](%rsp), %rdi
+; CHECK: callq _t1_helper
+; CHECK: movl [[OFFSET]](%rsp), %eax
+; CHECK: addl $13, %eax
+}
+
+declare void @t1_helper(i32*)
+
+; dynamic realignment
+define i32 @t2() nounwind uwtable ssp {
+entry:
+ %a = alloca i32, align 4
+ %v = alloca <8 x float>, align 32
+ call void @t2_helper(i32* %a, <8 x float>* %v) nounwind
+ %0 = load i32* %a, align 4
+ %add = add nsw i32 %0, 13
+ ret i32 %add
+
+; CHECK: _t2
+; CHECK: pushq %rbp
+; CHECK: movq %rsp, %rbp
+; CHECK: andq $-32, %rsp
+; CHECK: subq ${{[0-9]+}}, %rsp
+;
+; CHECK: leaq {{[0-9]*}}(%rsp), %rdi
+; CHECK: leaq {{[0-9]*}}(%rsp), %rsi
+; CHECK: callq _t2_helper
+;
+; CHECK: movq %rbp, %rsp
+; CHECK: popq %rbp
+}
+
+declare void @t2_helper(i32*, <8 x float>*)
+
+; VLAs
+define i32 @t3(i64 %sz) nounwind uwtable ssp {
+entry:
+ %a = alloca i32, align 4
+ %vla = alloca i32, i64 %sz, align 16
+ call void @t3_helper(i32* %a, i32* %vla) nounwind
+ %0 = load i32* %a, align 4
+ %add = add nsw i32 %0, 13
+ ret i32 %add
+
+; CHECK: _t3
+; CHECK: pushq %rbp
+; CHECK: movq %rsp, %rbp
+; CHECK: pushq %rbx
+; CHECK-NOT: andq $-{{[0-9]+}}, %rsp
+; CHECK: subq ${{[0-9]+}}, %rsp
+;
+; CHECK: leaq -{{[0-9]+}}(%rbp), %rsp
+; CHECK: popq %rbx
+; CHECK: popq %rbp
+}
+
+declare void @t3_helper(i32*, i32*)
+
+; VLAs + Dynamic realignment
+define i32 @t4(i64 %sz) nounwind uwtable ssp {
+entry:
+ %a = alloca i32, align 4
+ %v = alloca <8 x float>, align 32
+ %vla = alloca i32, i64 %sz, align 16
+ call void @t4_helper(i32* %a, i32* %vla, <8 x float>* %v) nounwind
+ %0 = load i32* %a, align 4
+ %add = add nsw i32 %0, 13
+ ret i32 %add
+
+; CHECK: _t4
+; CHECK: pushq %rbp
+; CHECK: movq %rsp, %rbp
+; CHECK: pushq %r14
+; CHECK: pushq %rbx
+; CHECK: andq $-32, %rsp
+; CHECK: subq ${{[0-9]+}}, %rsp
+; CHECK: movq %rsp, %rbx
+;
+; CHECK: leaq {{[0-9]*}}(%rbx), %rdi
+; CHECK: leaq {{[0-9]*}}(%rbx), %rdx
+; CHECK: callq _t4_helper
+;
+; CHECK: leaq -16(%rbp), %rsp
+; CHECK: popq %rbx
+; CHECK: popq %r14
+; CHECK: popq %rbp
+}
+
+declare void @t4_helper(i32*, i32*, <8 x float>*)
+
+; Dynamic realignment + Spill
+define i32 @t5(float* nocapture %f) nounwind uwtable ssp {
+entry:
+ %a = alloca i32, align 4
+ %0 = bitcast float* %f to <8 x float>*
+ %1 = load <8 x float>* %0, align 32
+ call void @t5_helper1(i32* %a) nounwind
+ call void @t5_helper2(<8 x float> %1) nounwind
+ %2 = load i32* %a, align 4
+ %add = add nsw i32 %2, 13
+ ret i32 %add
+
+; CHECK: _t5
+; CHECK: pushq %rbp
+; CHECK: movq %rsp, %rbp
+; CHECK: andq $-32, %rsp
+; CHECK: subq ${{[0-9]+}}, %rsp
+;
+; CHECK: vmovaps (%rdi), [[AVXREG:%ymm[0-9]+]]
+; CHECK: vmovaps [[AVXREG]], (%rsp)
+; CHECK: leaq {{[0-9]+}}(%rsp), %rdi
+; CHECK: callq _t5_helper1
+; CHECK: vmovaps (%rsp), %ymm0
+; CHECK: callq _t5_helper2
+; CHECK: movl {{[0-9]+}}(%rsp), %eax
+;
+; CHECK: movq %rbp, %rsp
+; CHECK: popq %rbp
+}
+
+declare void @t5_helper1(i32*)
+
+declare void @t5_helper2(<8 x float>)
+
+; VLAs + Dynamic realignment + Spill
+; FIXME: RA has already reserved RBX, so we can't do dynamic realignment.
+define i32 @t6(i64 %sz, float* nocapture %f) nounwind uwtable ssp {
+entry:
+; CHECK: _t6
+ %a = alloca i32, align 4
+ %0 = bitcast float* %f to <8 x float>*
+ %1 = load <8 x float>* %0, align 32
+ %vla = alloca i32, i64 %sz, align 16
+ call void @t6_helper1(i32* %a, i32* %vla) nounwind
+ call void @t6_helper2(<8 x float> %1) nounwind
+ %2 = load i32* %a, align 4
+ %add = add nsw i32 %2, 13
+ ret i32 %add
+}
+
+declare void @t6_helper1(i32*, i32*)
+
+declare void @t6_helper2(<8 x float>)
+
+; VLAs + Dynamic realignment + byval
+; The byval adjust the sp after the prolog, but if we're restoring the sp from
+; the base pointer we use the original adjustment.
+%struct.struct_t = type { [5 x i32] }
+
+define void @t7(i32 %size, %struct.struct_t* byval align 8 %arg1) nounwind uwtable {
+entry:
+ %x = alloca i32, align 32
+ store i32 0, i32* %x, align 32
+ %0 = zext i32 %size to i64
+ %vla = alloca i32, i64 %0, align 16
+ %1 = load i32* %x, align 32
+ call void @bar(i32 %1, i32* %vla, %struct.struct_t* byval align 8 %arg1)
+ ret void
+
+; CHECK: _t7
+; CHECK: pushq %rbp
+; CHECK: movq %rsp, %rbp
+; CHECK: pushq %rbx
+; CHECK: andq $-32, %rsp
+; CHECK: subq ${{[0-9]+}}, %rsp
+; CHECK: movq %rsp, %rbx
+
+; Stack adjustment for byval
+; CHECK: subq {{.*}}, %rsp
+; CHECK: callq _bar
+; CHECK-NOT: addq {{.*}}, %rsp
+; CHECK: leaq -8(%rbp), %rsp
+; CHECK: popq %rbx
+; CHECK: popq %rbp
+}
+
+declare i8* @llvm.stacksave() nounwind
+
+declare void @bar(i32, i32*, %struct.struct_t* byval align 8)
+
+declare void @llvm.stackrestore(i8*) nounwind
+
+
+; Test when forcing stack alignment
+define i32 @t8() nounwind uwtable {
+entry:
+ %a = alloca i32, align 4
+ call void @t1_helper(i32* %a) nounwind
+ %0 = load i32* %a, align 4
+ %add = add nsw i32 %0, 13
+ ret i32 %add
+
+; FORCE-ALIGN: _t8
+; FORCE-ALIGN: movq %rsp, %rbp
+; FORCE-ALIGN: andq $-32, %rsp
+; FORCE-ALIGN-NEXT: subq $32, %rsp
+; FORCE-ALIGN: movq %rbp, %rsp
+; FORCE-ALIGN: popq %rbp
+}
+
+; VLAs
+define i32 @t9(i64 %sz) nounwind uwtable {
+entry:
+ %a = alloca i32, align 4
+ %vla = alloca i32, i64 %sz, align 16
+ call void @t3_helper(i32* %a, i32* %vla) nounwind
+ %0 = load i32* %a, align 4
+ %add = add nsw i32 %0, 13
+ ret i32 %add
+
+; FORCE-ALIGN: _t9
+; FORCE-ALIGN: pushq %rbp
+; FORCE-ALIGN: movq %rsp, %rbp
+; FORCE-ALIGN: pushq %rbx
+; FORCE-ALIGN: andq $-32, %rsp
+; FORCE-ALIGN: subq $32, %rsp
+; FORCE-ALIGN: movq %rsp, %rbx
+
+; FORCE-ALIGN: leaq -8(%rbp), %rsp
+; FORCE-ALIGN: popq %rbx
+; FORCE-ALIGN: popq %rbp
+}
diff --git a/test/CodeGen/X86/early-ifcvt.ll b/test/CodeGen/X86/early-ifcvt.ll
new file mode 100644
index 000000000000..7883ffabd565
--- /dev/null
+++ b/test/CodeGen/X86/early-ifcvt.ll
@@ -0,0 +1,69 @@
+; RUN: llc < %s -enable-early-ifcvt -stress-early-ifcvt | FileCheck %s
+target triple = "x86_64-apple-macosx10.8.0"
+
+; CHECK: mm2
+define i32 @mm2(i32* nocapture %p, i32 %n) nounwind uwtable readonly ssp {
+entry:
+ br label %do.body
+
+; CHECK: do.body
+; Loop body has no branches before the backedge.
+; CHECK-NOT: LBB
+do.body:
+ %max.0 = phi i32 [ 0, %entry ], [ %max.1, %do.cond ]
+ %min.0 = phi i32 [ 0, %entry ], [ %min.1, %do.cond ]
+ %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.cond ]
+ %p.addr.0 = phi i32* [ %p, %entry ], [ %incdec.ptr, %do.cond ]
+ %incdec.ptr = getelementptr inbounds i32* %p.addr.0, i64 1
+ %0 = load i32* %p.addr.0, align 4
+ %cmp = icmp sgt i32 %0, %max.0
+ br i1 %cmp, label %do.cond, label %if.else
+
+if.else:
+ %cmp1 = icmp slt i32 %0, %min.0
+ %.min.0 = select i1 %cmp1, i32 %0, i32 %min.0
+ br label %do.cond
+
+do.cond:
+ %max.1 = phi i32 [ %0, %do.body ], [ %max.0, %if.else ]
+ %min.1 = phi i32 [ %min.0, %do.body ], [ %.min.0, %if.else ]
+; CHECK: decl %esi
+; CHECK: jne LBB
+ %dec = add i32 %n.addr.0, -1
+ %tobool = icmp eq i32 %dec, 0
+ br i1 %tobool, label %do.end, label %do.body
+
+do.end:
+ %sub = sub nsw i32 %max.1, %min.1
+ ret i32 %sub
+}
+
+; CHECK: multipreds
+; Deal with alternative tail predecessors
+; CHECK-NOT: LBB
+; CHECK: cmov
+; CHECK-NOT: LBB
+; CHECK: cmov
+; CHECK-NOT: LBB
+; CHECK: fprintf
+
+define void @multipreds(i32 %sw) nounwind uwtable ssp {
+entry:
+ switch i32 %sw, label %if.then29 [
+ i32 0, label %if.then37
+ i32 127, label %if.end41
+ ]
+
+if.then29:
+ br label %if.end41
+
+if.then37:
+ br label %if.end41
+
+if.end41:
+ %exit_status.0 = phi i32 [ 2, %if.then29 ], [ 0, %if.then37 ], [ 66, %entry ]
+ call void (...)* @fprintf(i32 %exit_status.0) nounwind
+ unreachable
+}
+
+declare void @fprintf(...) nounwind
diff --git a/test/CodeGen/X86/epilogue.ll b/test/CodeGen/X86/epilogue.ll
index 0f16a64ccd79..090680e48feb 100644
--- a/test/CodeGen/X86/epilogue.ll
+++ b/test/CodeGen/X86/epilogue.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -mcpu=generic -march=x86 | not grep lea
-; RUN: llc < %s -mcpu=generic -march=x86 | grep {movl %ebp}
+; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s
+
+; CHECK-NOT: lea{{.*}}(%esp)
+; CHECK: {{(mov.* %ebp, %esp)|(lea.*\(%ebp\), %esp)}}
declare void @bar(<2 x i64>* %n)
diff --git a/test/CodeGen/X86/extractps.ll b/test/CodeGen/X86/extractps.ll
index 14778f097ef5..9e1a3754d0f0 100644
--- a/test/CodeGen/X86/extractps.ll
+++ b/test/CodeGen/X86/extractps.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=x86 -mcpu=penryn > %t
; RUN: not grep movd %t
-; RUN: grep {movss %xmm} %t | count 1
-; RUN: grep {extractps \\\$1, %xmm0, } %t | count 1
+; RUN: grep "movss %xmm" %t | count 1
+; RUN: grep "extractps \$1, %xmm0, " %t | count 1
; PR2647
external global float, align 16 ; <float*>:0 [#uses=2]
diff --git a/test/CodeGen/X86/fabs.ll b/test/CodeGen/X86/fabs.ll
index 9ded7e05dc46..af1867fc51cc 100644
--- a/test/CodeGen/X86/fabs.ll
+++ b/test/CodeGen/X86/fabs.ll
@@ -1,28 +1,54 @@
; Make sure this testcase codegens to the fabs instruction, not a call to fabsf
-; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3,-sse | grep fabs\$ | \
-; RUN: count 2
-; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math -enable-no-nans-fp-math | \
-; RUN: grep fabs\$ | count 3
+; RUN: llc < %s -mtriple=i686-apple-macosx -mattr=-sse2,-sse3,-sse | FileCheck %s
+; RUN: llc < %s -mtriple=i686-apple-macosx -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=UNSAFE
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -O0 | FileCheck %s --check-prefix=NOOPT
declare float @fabsf(float)
declare x86_fp80 @fabsl(x86_fp80)
+; CHECK: test1:
+; UNSAFE: test1:
+; NOOPT: test1:
define float @test1(float %X) {
- %Y = call float @fabsf(float %X)
+ %Y = call float @fabsf(float %X) readnone
ret float %Y
}
+; CHECK: {{^[ \t]+fabs$}}
+; UNSAFE: {{^[ \t]+fabs$}}
+; CHECK-NOT: fabs
+; UNSAFE-NOT: fabs
+; NOOPT-NOT: fabsf
+
+; CHECK: test2:
+; UNSAFE: test2:
+; NOOPT: test2:
define double @test2(double %X) {
%Y = fcmp oge double %X, -0.0
%Z = fsub double -0.0, %X
%Q = select i1 %Y, double %X, double %Z
ret double %Q
}
+; fabs is not used here.
+; CHECK-NOT: fabs
+; NOOPT-NOT: fabs
+
+; UNSAFE: {{^[ \t]+fabs$}}
+; UNSAFE-NOT: fabs
+
+; CHECK: test3:
+; UNSAFE: test3:
+; NOOPT: test3:
define x86_fp80 @test3(x86_fp80 %X) {
- %Y = call x86_fp80 @fabsl(x86_fp80 %X)
+ %Y = call x86_fp80 @fabsl(x86_fp80 %X) readnone
ret x86_fp80 %Y
}
+; CHECK: {{^[ \t]+fabs$}}
+; UNSAFE: {{^[ \t]+fabs$}}
+; NOOPT: {{^[ \t]+fabs$}}
-
+; CHECK-NOT: fabs
+; UNSAFE-NOT: fabs
+; NOOPT-NOT: fabs
diff --git a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
index e4982f054954..14cb136f89de 100644
--- a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
+++ b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mcpu=generic -march=x86 -x86-asm-syntax=intel | \
-; RUN: grep {add ESP, 8}
+; RUN: grep "add ESP, 8"
target triple = "i686-pc-linux-gnu"
diff --git a/test/CodeGen/X86/fast-isel-constpool.ll b/test/CodeGen/X86/fast-isel-constpool.ll
index 323c8533cec2..b3adb802a8c5 100644
--- a/test/CodeGen/X86/fast-isel-constpool.ll
+++ b/test/CodeGen/X86/fast-isel-constpool.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -fast-isel | grep {LCPI0_0(%rip)}
+; RUN: llc < %s -fast-isel | grep "LCPI0_0(%rip)"
; Make sure fast isel uses rip-relative addressing when required.
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-apple-darwin9.0"
diff --git a/test/CodeGen/X86/fast-isel-gv.ll b/test/CodeGen/X86/fast-isel-gv.ll
index 34f8b382522f..cb2464e746b1 100644
--- a/test/CodeGen/X86/fast-isel-gv.ll
+++ b/test/CodeGen/X86/fast-isel-gv.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -fast-isel | grep {_kill@GOTPCREL(%rip)}
+; RUN: llc < %s -fast-isel | grep "_kill@GOTPCREL(%rip)"
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-apple-darwin10.0"
@f = global i8 (...)* @kill ; <i8 (...)**> [#uses=1]
diff --git a/test/CodeGen/X86/fast-isel-mem.ll b/test/CodeGen/X86/fast-isel-mem.ll
index 8db1936bc20e..52b1e8564338 100644
--- a/test/CodeGen/X86/fast-isel-mem.ll
+++ b/test/CodeGen/X86/fast-isel-mem.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin | FileCheck %s
+; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin -mcpu=generic | FileCheck %s
+; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin -mcpu=atom | FileCheck -check-prefix=ATOM %s
@src = external global i32
@@ -18,6 +19,13 @@ entry:
; CHECK: movl %eax, (%ecx)
; CHECK: ret
+; ATOM: loadgv:
+; ATOM: movl L_src$non_lazy_ptr, %ecx
+; ATOM: movl (%ecx), %eax
+; ATOM: addl (%ecx), %eax
+; ATOM: movl %eax, (%ecx)
+; ATOM: ret
+
}
%stuff = type { i32 (...)** }
@@ -31,4 +39,8 @@ entry:
; CHECK: movl $0, %eax
; CHECK: movl L_LotsStuff$non_lazy_ptr, %ecx
+; ATOM: _t:
+; ATOM: movl L_LotsStuff$non_lazy_ptr, %ecx
+; ATOM: movl $0, %eax
+
}
diff --git a/test/CodeGen/X86/fast-isel-x86.ll b/test/CodeGen/X86/fast-isel-x86.ll
index b9598bb465ce..19f38882a6c6 100644
--- a/test/CodeGen/X86/fast-isel-x86.ll
+++ b/test/CodeGen/X86/fast-isel-x86.ll
@@ -46,3 +46,17 @@ entry:
; CHECK: addl $40
}
declare void @test3sret(%struct.a* sret)
+
+; Check that fast-isel sret works with fastcc (and does not callee-pop)
+define void @test4() nounwind ssp {
+entry:
+ %tmp = alloca %struct.a, align 8
+ call fastcc void @test4fastccsret(%struct.a* sret %tmp)
+ ret void
+; CHECK: test4:
+; CHECK: subl $28
+; CHECK: leal (%esp), %ecx
+; CHECK: calll _test4fastccsret
+; CHECK addl $28
+}
+declare fastcc void @test4fastccsret(%struct.a* sret)
diff --git a/test/CodeGen/X86/fast-isel.ll b/test/CodeGen/X86/fast-isel.ll
index c88d52968dd8..132df2b0ab43 100644
--- a/test/CodeGen/X86/fast-isel.ll
+++ b/test/CodeGen/X86/fast-isel.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -fast-isel -fast-isel-abort -march=x86 -mattr=sse2
-; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin10
+; RUN: llc < %s -fast-isel -fast-isel-abort -verify-machineinstrs -march=x86 -mattr=sse2
+; RUN: llc < %s -fast-isel -fast-isel-abort -verify-machineinstrs -mtriple=x86_64-apple-darwin10
; This tests very minimal fast-isel functionality.
@@ -117,3 +117,11 @@ define i64* @life() nounwind {
ret i64* %a3
}
+declare void @llvm.donothing() readnone
+
+; CHECK: donada
+define void @donada() nounwind {
+entry:
+ call void @llvm.donothing()
+ ret void
+}
diff --git a/test/CodeGen/X86/fastcc-byval.ll b/test/CodeGen/X86/fastcc-byval.ll
index 52b3e57b96bc..f1204d677a55 100644
--- a/test/CodeGen/X86/fastcc-byval.ll
+++ b/test/CodeGen/X86/fastcc-byval.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -tailcallopt=false | grep {movl\[\[:space:\]\]*8(%esp), %eax} | count 2
+; RUN: llc < %s -tailcallopt=false | grep "movl[[:space:]]*8(%esp), %eax" | count 2
; PR3122
; rdar://6400815
diff --git a/test/CodeGen/X86/fma.ll b/test/CodeGen/X86/fma.ll
index 5deedb9dd9b1..b0c1d0a0dd1c 100644
--- a/test/CodeGen/X86/fma.ll
+++ b/test/CodeGen/X86/fma.ll
@@ -1,8 +1,11 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -mattr=+fma | FileCheck %s --check-prefix=CHECK-FMA-INST
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck %s --check-prefix=CHECK-FMA-CALL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mattr=+fma | FileCheck %s --check-prefix=CHECK-FMA-INST
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s --check-prefix=CHECK-FMA-CALL
; CHECK: test_f32
-; CHECK: _fmaf
+; CHECK-FMA-INST: vfmadd213ss
+; CHECK-FMA-CALL: _fmaf
define float @test_f32(float %a, float %b, float %c) nounwind readnone ssp {
entry:
@@ -11,7 +14,8 @@ entry:
}
; CHECK: test_f64
-; CHECK: _fma
+; CHECK-FMA-INST: vfmadd213sd
+; CHECK-FMA-CALL: _fma
define double @test_f64(double %a, double %b, double %c) nounwind readnone ssp {
entry:
diff --git a/test/CodeGen/X86/fma3-intrinsics.ll b/test/CodeGen/X86/fma3-intrinsics.ll
new file mode 100755
index 000000000000..90529e09d75b
--- /dev/null
+++ b/test/CodeGen/X86/fma3-intrinsics.ll
@@ -0,0 +1,132 @@
+; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 -mattr=avx2,+fma | FileCheck %s
+
+define <4 x float> @test_x86_fmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+ ; CHECK: fmadd213ss %xmm
+ %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+
+define <4 x float> @test_x86_fmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+ ; CHECK: fmadd213ps
+ %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+
+define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+ ; CHECK: fmadd213ps {{.*\(%r.*}}, %ymm
+ %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) nounwind
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
+
+define <4 x float> @test_x86_fnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+ ; CHECK: fnmadd213ss %xmm
+ %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+
+define <4 x float> @test_x86_fnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+ ; CHECK: fnmadd213ps
+ %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+
+define <8 x float> @test_x86_fnmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+ ; CHECK: fnmadd213ps {{.*\(%r.*}}, %ymm
+ %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) nounwind
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_fmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+ ; CHECK: fmsub213ss
+ %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+
+define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+ ; CHECK: fmsub213ps
+ %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+
+define <4 x float> @test_x86_fnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+ ; CHECK: fnmsub213ss
+ %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+
+define <4 x float> @test_x86_fnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+ ; CHECK: fnmsub213ps
+ %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+
+;;;;
+
+define <2 x double> @test_x86_fmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+ ; CHECK: fmadd213sd
+ %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+
+define <2 x double> @test_x86_fmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+ ; CHECK: fmadd213pd
+ %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+
+define <2 x double> @test_x86_fnmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+ ; CHECK: fnmadd213sd
+ %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+
+define <2 x double> @test_x86_fnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+ ; CHECK: fnmadd213pd
+ %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+
+
+
+define <2 x double> @test_x86_fmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+ ; CHECK: fmsub213sd
+ %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+
+define <2 x double> @test_x86_fmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+ ; CHECK: fmsub213pd
+ %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+
+define <2 x double> @test_x86_fnmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+ ; CHECK: fnmsub213sd
+ %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+
+define <2 x double> @test_x86_fnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+ ; CHECK: fnmsub213pd
+ %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
diff --git a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
index 5ed03ef01f3c..fd414b346e2b 100644
--- a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
+++ b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
@@ -1,295 +1,295 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=+avx,+fma4 | FileCheck %s
; VFMADD
-define < 4 x float > @test_x86_fma4_vfmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+define < 4 x float > @test_x86_fma_vfmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
; CHECK: vfmaddss
- %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+ %res = call < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
ret < 4 x float > %res
}
-define < 4 x float > @test_x86_fma4_vfmadd_ss_load(< 4 x float > %a0, < 4 x float > %a1, float* %a2) {
+define < 4 x float > @test_x86_fma_vfmadd_ss_load(< 4 x float > %a0, < 4 x float > %a1, float* %a2) {
; CHECK: vfmaddss (%{{.*}})
%x = load float *%a2
%y = insertelement <4 x float> undef, float %x, i32 0
- %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %y) ; <i64> [#uses=1]
+ %res = call < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %y) ; <i64> [#uses=1]
ret < 4 x float > %res
}
-define < 4 x float > @test_x86_fma4_vfmadd_ss_load2(< 4 x float > %a0, float* %a1, < 4 x float > %a2) {
+define < 4 x float > @test_x86_fma_vfmadd_ss_load2(< 4 x float > %a0, float* %a1, < 4 x float > %a2) {
; CHECK: vfmaddss %{{.*}}, (%{{.*}})
%x = load float *%a1
%y = insertelement <4 x float> undef, float %x, i32 0
- %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %y, < 4 x float > %a2) ; <i64> [#uses=1]
+ %res = call < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float > %a0, < 4 x float > %y, < 4 x float > %a2) ; <i64> [#uses=1]
ret < 4 x float > %res
}
-declare < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+declare < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
-define < 2 x double > @test_x86_fma4_vfmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+define < 2 x double > @test_x86_fma_vfmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
; CHECK: vfmaddsd
- %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+ %res = call < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
ret < 2 x double > %res
}
-define < 2 x double > @test_x86_fma4_vfmadd_sd_load(< 2 x double > %a0, < 2 x double > %a1, double* %a2) {
+define < 2 x double > @test_x86_fma_vfmadd_sd_load(< 2 x double > %a0, < 2 x double > %a1, double* %a2) {
; CHECK: vfmaddsd (%{{.*}})
%x = load double *%a2
%y = insertelement <2 x double> undef, double %x, i32 0
- %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %y) ; <i64> [#uses=1]
+ %res = call < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %y) ; <i64> [#uses=1]
ret < 2 x double > %res
}
-define < 2 x double > @test_x86_fma4_vfmadd_sd_load2(< 2 x double > %a0, double* %a1, < 2 x double > %a2) {
+define < 2 x double > @test_x86_fma_vfmadd_sd_load2(< 2 x double > %a0, double* %a1, < 2 x double > %a2) {
; CHECK: vfmaddsd %{{.*}}, (%{{.*}})
%x = load double *%a1
%y = insertelement <2 x double> undef, double %x, i32 0
- %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %y, < 2 x double > %a2) ; <i64> [#uses=1]
+ %res = call < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double > %a0, < 2 x double > %y, < 2 x double > %a2) ; <i64> [#uses=1]
ret < 2 x double > %res
}
-declare < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+declare < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
-define < 4 x float > @test_x86_fma4_vfmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+define < 4 x float > @test_x86_fma_vfmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
; CHECK: vfmaddps
- %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+ %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
ret < 4 x float > %res
}
-define < 4 x float > @test_x86_fma4_vfmadd_ps_load(< 4 x float > %a0, < 4 x float > %a1, < 4 x float >* %a2) {
+define < 4 x float > @test_x86_fma_vfmadd_ps_load(< 4 x float > %a0, < 4 x float > %a1, < 4 x float >* %a2) {
; CHECK: vfmaddps (%{{.*}})
%x = load <4 x float>* %a2
- %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %x) ; <i64> [#uses=1]
+ %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %x) ; <i64> [#uses=1]
ret < 4 x float > %res
}
-define < 4 x float > @test_x86_fma4_vfmadd_ps_load2(< 4 x float > %a0, < 4 x float >* %a1, < 4 x float > %a2) {
+define < 4 x float > @test_x86_fma_vfmadd_ps_load2(< 4 x float > %a0, < 4 x float >* %a1, < 4 x float > %a2) {
; CHECK: vfmaddps %{{.*}}, (%{{.*}})
%x = load <4 x float>* %a1
- %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float > %a0, < 4 x float > %x, < 4 x float > %a2) ; <i64> [#uses=1]
+ %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %x, < 4 x float > %a2) ; <i64> [#uses=1]
ret < 4 x float > %res
}
-declare < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+declare < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
-define < 2 x double > @test_x86_fma4_vfmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+define < 2 x double > @test_x86_fma_vfmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
; CHECK: vfmaddpd
- %res = call < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+ %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
ret < 2 x double > %res
}
-define < 2 x double > @test_x86_fma4_vfmadd_pd_load(< 2 x double > %a0, < 2 x double > %a1, < 2 x double >* %a2) {
+define < 2 x double > @test_x86_fma_vfmadd_pd_load(< 2 x double > %a0, < 2 x double > %a1, < 2 x double >* %a2) {
; CHECK: vfmaddpd (%{{.*}})
%x = load <2 x double>* %a2
- %res = call < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %x) ; <i64> [#uses=1]
+ %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %x) ; <i64> [#uses=1]
ret < 2 x double > %res
}
-define < 2 x double > @test_x86_fma4_vfmadd_pd_load2(< 2 x double > %a0, < 2 x double >* %a1, < 2 x double > %a2) {
+define < 2 x double > @test_x86_fma_vfmadd_pd_load2(< 2 x double > %a0, < 2 x double >* %a1, < 2 x double > %a2) {
; CHECK: vfmaddpd %{{.*}}, (%{{.*}})
%x = load <2 x double>* %a1
- %res = call < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double > %a0, < 2 x double > %x, < 2 x double > %a2) ; <i64> [#uses=1]
+ %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %x, < 2 x double > %a2) ; <i64> [#uses=1]
ret < 2 x double > %res
}
-declare < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+declare < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
-define < 8 x float > @test_x86_fma4_vfmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+define < 8 x float > @test_x86_fma_vfmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
; CHECK: vfmaddps
; CHECK: ymm
- %res = call < 8 x float > @llvm.x86.fma4.vfmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+ %res = call < 8 x float > @llvm.x86.fma.vfmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
ret < 8 x float > %res
}
-declare < 8 x float > @llvm.x86.fma4.vfmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+declare < 8 x float > @llvm.x86.fma.vfmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
-define < 4 x double > @test_x86_fma4_vfmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+define < 4 x double > @test_x86_fma_vfmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
; CHECK: vfmaddpd
; CHECK: ymm
- %res = call < 4 x double > @llvm.x86.fma4.vfmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+ %res = call < 4 x double > @llvm.x86.fma.vfmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
ret < 4 x double > %res
}
-declare < 4 x double > @llvm.x86.fma4.vfmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+declare < 4 x double > @llvm.x86.fma.vfmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
; VFMSUB
-define < 4 x float > @test_x86_fma4_vfmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+define < 4 x float > @test_x86_fma_vfmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
; CHECK: vfmsubss
- %res = call < 4 x float > @llvm.x86.fma4.vfmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+ %res = call < 4 x float > @llvm.x86.fma.vfmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
ret < 4 x float > %res
}
-declare < 4 x float > @llvm.x86.fma4.vfmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+declare < 4 x float > @llvm.x86.fma.vfmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
-define < 2 x double > @test_x86_fma4_vfmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+define < 2 x double > @test_x86_fma_vfmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
; CHECK: vfmsubsd
- %res = call < 2 x double > @llvm.x86.fma4.vfmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+ %res = call < 2 x double > @llvm.x86.fma.vfmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
ret < 2 x double > %res
}
-declare < 2 x double > @llvm.x86.fma4.vfmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+declare < 2 x double > @llvm.x86.fma.vfmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
-define < 4 x float > @test_x86_fma4_vfmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+define < 4 x float > @test_x86_fma_vfmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
; CHECK: vfmsubps
- %res = call < 4 x float > @llvm.x86.fma4.vfmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+ %res = call < 4 x float > @llvm.x86.fma.vfmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
ret < 4 x float > %res
}
-declare < 4 x float > @llvm.x86.fma4.vfmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+declare < 4 x float > @llvm.x86.fma.vfmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
-define < 2 x double > @test_x86_fma4_vfmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+define < 2 x double > @test_x86_fma_vfmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
; CHECK: vfmsubpd
- %res = call < 2 x double > @llvm.x86.fma4.vfmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+ %res = call < 2 x double > @llvm.x86.fma.vfmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
ret < 2 x double > %res
}
-declare < 2 x double > @llvm.x86.fma4.vfmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+declare < 2 x double > @llvm.x86.fma.vfmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
-define < 8 x float > @test_x86_fma4_vfmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+define < 8 x float > @test_x86_fma_vfmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
; CHECK: vfmsubps
; CHECK: ymm
- %res = call < 8 x float > @llvm.x86.fma4.vfmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+ %res = call < 8 x float > @llvm.x86.fma.vfmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
ret < 8 x float > %res
}
-declare < 8 x float > @llvm.x86.fma4.vfmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+declare < 8 x float > @llvm.x86.fma.vfmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
-define < 4 x double > @test_x86_fma4_vfmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+define < 4 x double > @test_x86_fma_vfmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
; CHECK: vfmsubpd
; CHECK: ymm
- %res = call < 4 x double > @llvm.x86.fma4.vfmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+ %res = call < 4 x double > @llvm.x86.fma.vfmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
ret < 4 x double > %res
}
-declare < 4 x double > @llvm.x86.fma4.vfmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+declare < 4 x double > @llvm.x86.fma.vfmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
; VFNMADD
-define < 4 x float > @test_x86_fma4_vfnmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+define < 4 x float > @test_x86_fma_vfnmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
; CHECK: vfnmaddss
- %res = call < 4 x float > @llvm.x86.fma4.vfnmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+ %res = call < 4 x float > @llvm.x86.fma.vfnmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
ret < 4 x float > %res
}
-declare < 4 x float > @llvm.x86.fma4.vfnmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+declare < 4 x float > @llvm.x86.fma.vfnmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
-define < 2 x double > @test_x86_fma4_vfnmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+define < 2 x double > @test_x86_fma_vfnmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
; CHECK: vfnmaddsd
- %res = call < 2 x double > @llvm.x86.fma4.vfnmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+ %res = call < 2 x double > @llvm.x86.fma.vfnmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
ret < 2 x double > %res
}
-declare < 2 x double > @llvm.x86.fma4.vfnmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+declare < 2 x double > @llvm.x86.fma.vfnmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
-define < 4 x float > @test_x86_fma4_vfnmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+define < 4 x float > @test_x86_fma_vfnmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
; CHECK: vfnmaddps
- %res = call < 4 x float > @llvm.x86.fma4.vfnmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+ %res = call < 4 x float > @llvm.x86.fma.vfnmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
ret < 4 x float > %res
}
-declare < 4 x float > @llvm.x86.fma4.vfnmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+declare < 4 x float > @llvm.x86.fma.vfnmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
-define < 2 x double > @test_x86_fma4_vfnmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+define < 2 x double > @test_x86_fma_vfnmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
; CHECK: vfnmaddpd
- %res = call < 2 x double > @llvm.x86.fma4.vfnmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+ %res = call < 2 x double > @llvm.x86.fma.vfnmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
ret < 2 x double > %res
}
-declare < 2 x double > @llvm.x86.fma4.vfnmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+declare < 2 x double > @llvm.x86.fma.vfnmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
-define < 8 x float > @test_x86_fma4_vfnmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+define < 8 x float > @test_x86_fma_vfnmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
; CHECK: vfnmaddps
; CHECK: ymm
- %res = call < 8 x float > @llvm.x86.fma4.vfnmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+ %res = call < 8 x float > @llvm.x86.fma.vfnmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
ret < 8 x float > %res
}
-declare < 8 x float > @llvm.x86.fma4.vfnmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+declare < 8 x float > @llvm.x86.fma.vfnmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
-define < 4 x double > @test_x86_fma4_vfnmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+define < 4 x double > @test_x86_fma_vfnmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
; CHECK: vfnmaddpd
; CHECK: ymm
- %res = call < 4 x double > @llvm.x86.fma4.vfnmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+ %res = call < 4 x double > @llvm.x86.fma.vfnmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
ret < 4 x double > %res
}
-declare < 4 x double > @llvm.x86.fma4.vfnmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+declare < 4 x double > @llvm.x86.fma.vfnmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
; VFNMSUB
-define < 4 x float > @test_x86_fma4_vfnmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+define < 4 x float > @test_x86_fma_vfnmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
; CHECK: vfnmsubss
- %res = call < 4 x float > @llvm.x86.fma4.vfnmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+ %res = call < 4 x float > @llvm.x86.fma.vfnmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
ret < 4 x float > %res
}
-declare < 4 x float > @llvm.x86.fma4.vfnmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+declare < 4 x float > @llvm.x86.fma.vfnmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
-define < 2 x double > @test_x86_fma4_vfnmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+define < 2 x double > @test_x86_fma_vfnmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
; CHECK: vfnmsubsd
- %res = call < 2 x double > @llvm.x86.fma4.vfnmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+ %res = call < 2 x double > @llvm.x86.fma.vfnmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
ret < 2 x double > %res
}
-declare < 2 x double > @llvm.x86.fma4.vfnmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+declare < 2 x double > @llvm.x86.fma.vfnmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
-define < 4 x float > @test_x86_fma4_vfnmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+define < 4 x float > @test_x86_fma_vfnmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
; CHECK: vfnmsubps
- %res = call < 4 x float > @llvm.x86.fma4.vfnmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+ %res = call < 4 x float > @llvm.x86.fma.vfnmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
ret < 4 x float > %res
}
-declare < 4 x float > @llvm.x86.fma4.vfnmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+declare < 4 x float > @llvm.x86.fma.vfnmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
-define < 2 x double > @test_x86_fma4_vfnmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+define < 2 x double > @test_x86_fma_vfnmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
; CHECK: vfnmsubpd
- %res = call < 2 x double > @llvm.x86.fma4.vfnmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+ %res = call < 2 x double > @llvm.x86.fma.vfnmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
ret < 2 x double > %res
}
-declare < 2 x double > @llvm.x86.fma4.vfnmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+declare < 2 x double > @llvm.x86.fma.vfnmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
-define < 8 x float > @test_x86_fma4_vfnmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+define < 8 x float > @test_x86_fma_vfnmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
; CHECK: vfnmsubps
; CHECK: ymm
- %res = call < 8 x float > @llvm.x86.fma4.vfnmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+ %res = call < 8 x float > @llvm.x86.fma.vfnmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
ret < 8 x float > %res
}
-declare < 8 x float > @llvm.x86.fma4.vfnmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+declare < 8 x float > @llvm.x86.fma.vfnmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
-define < 4 x double > @test_x86_fma4_vfnmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+define < 4 x double > @test_x86_fma_vfnmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
; CHECK: vfnmsubpd
; CHECK: ymm
- %res = call < 4 x double > @llvm.x86.fma4.vfnmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+ %res = call < 4 x double > @llvm.x86.fma.vfnmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
ret < 4 x double > %res
}
-declare < 4 x double > @llvm.x86.fma4.vfnmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+declare < 4 x double > @llvm.x86.fma.vfnmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
; VFMADDSUB
-define < 4 x float > @test_x86_fma4_vfmaddsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+define < 4 x float > @test_x86_fma_vfmaddsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
; CHECK: vfmaddsubps
- %res = call < 4 x float > @llvm.x86.fma4.vfmaddsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+ %res = call < 4 x float > @llvm.x86.fma.vfmaddsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
ret < 4 x float > %res
}
-declare < 4 x float > @llvm.x86.fma4.vfmaddsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+declare < 4 x float > @llvm.x86.fma.vfmaddsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
-define < 2 x double > @test_x86_fma4_vfmaddsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+define < 2 x double > @test_x86_fma_vfmaddsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
; CHECK: vfmaddsubpd
- %res = call < 2 x double > @llvm.x86.fma4.vfmaddsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+ %res = call < 2 x double > @llvm.x86.fma.vfmaddsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
ret < 2 x double > %res
}
-declare < 2 x double > @llvm.x86.fma4.vfmaddsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+declare < 2 x double > @llvm.x86.fma.vfmaddsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
-define < 8 x float > @test_x86_fma4_vfmaddsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+define < 8 x float > @test_x86_fma_vfmaddsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
; CHECK: vfmaddsubps
; CHECK: ymm
- %res = call < 8 x float > @llvm.x86.fma4.vfmaddsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+ %res = call < 8 x float > @llvm.x86.fma.vfmaddsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
ret < 8 x float > %res
}
-declare < 8 x float > @llvm.x86.fma4.vfmaddsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+declare < 8 x float > @llvm.x86.fma.vfmaddsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
-define < 4 x double > @test_x86_fma4_vfmaddsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+define < 4 x double > @test_x86_fma_vfmaddsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
; CHECK: vfmaddsubpd
; CHECK: ymm
- %res = call < 4 x double > @llvm.x86.fma4.vfmaddsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+ %res = call < 4 x double > @llvm.x86.fma.vfmaddsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
ret < 4 x double > %res
}
-declare < 4 x double > @llvm.x86.fma4.vfmaddsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+declare < 4 x double > @llvm.x86.fma.vfmaddsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
; VFMSUBADD
-define < 4 x float > @test_x86_fma4_vfmsubadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+define < 4 x float > @test_x86_fma_vfmsubadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
; CHECK: vfmsubaddps
- %res = call < 4 x float > @llvm.x86.fma4.vfmsubadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+ %res = call < 4 x float > @llvm.x86.fma.vfmsubadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
ret < 4 x float > %res
}
-declare < 4 x float > @llvm.x86.fma4.vfmsubadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+declare < 4 x float > @llvm.x86.fma.vfmsubadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
-define < 2 x double > @test_x86_fma4_vfmsubadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+define < 2 x double > @test_x86_fma_vfmsubadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
; CHECK: vfmsubaddpd
- %res = call < 2 x double > @llvm.x86.fma4.vfmsubadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+ %res = call < 2 x double > @llvm.x86.fma.vfmsubadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
ret < 2 x double > %res
}
-declare < 2 x double > @llvm.x86.fma4.vfmsubadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+declare < 2 x double > @llvm.x86.fma.vfmsubadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
-define < 8 x float > @test_x86_fma4_vfmsubadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+define < 8 x float > @test_x86_fma_vfmsubadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
; CHECK: vfmsubaddps
; CHECK: ymm
- %res = call < 8 x float > @llvm.x86.fma4.vfmsubadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+ %res = call < 8 x float > @llvm.x86.fma.vfmsubadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
ret < 8 x float > %res
}
-declare < 8 x float > @llvm.x86.fma4.vfmsubadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+declare < 8 x float > @llvm.x86.fma.vfmsubadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
-define < 4 x double > @test_x86_fma4_vfmsubadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+define < 4 x double > @test_x86_fma_vfmsubadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
; CHECK: vfmsubaddpd
; CHECK: ymm
- %res = call < 4 x double > @llvm.x86.fma4.vfmsubadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+ %res = call < 4 x double > @llvm.x86.fma.vfmsubadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
ret < 4 x double > %res
}
-declare < 4 x double > @llvm.x86.fma4.vfmsubadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+declare < 4 x double > @llvm.x86.fma.vfmsubadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
diff --git a/test/CodeGen/X86/fma_patterns.ll b/test/CodeGen/X86/fma_patterns.ll
new file mode 100644
index 000000000000..5d97a87b3bbf
--- /dev/null
+++ b/test/CodeGen/X86/fma_patterns.ll
@@ -0,0 +1,139 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=avx2,+fma -fp-contract=fast | FileCheck %s
+
+; CHECK: test_x86_fmadd_ps
+; CHECK: vfmadd213ps %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define <4 x float> @test_x86_fmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+ %x = fmul <4 x float> %a0, %a1
+ %res = fadd <4 x float> %x, %a2
+ ret <4 x float> %res
+}
+
+; CHECK: test_x86_fmsub_ps
+; CHECK: fmsub213ps %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+ %x = fmul <4 x float> %a0, %a1
+ %res = fsub <4 x float> %x, %a2
+ ret <4 x float> %res
+}
+
+; CHECK: test_x86_fnmadd_ps
+; CHECK: fnmadd213ps %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define <4 x float> @test_x86_fnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+ %x = fmul <4 x float> %a0, %a1
+ %res = fsub <4 x float> %a2, %x
+ ret <4 x float> %res
+}
+
+; CHECK: test_x86_fnmsub_ps
+; CHECK: fnmsub213ps %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define <4 x float> @test_x86_fnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+ %x = fmul <4 x float> %a0, %a1
+ %y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
+ %res = fsub <4 x float> %y, %a2
+ ret <4 x float> %res
+}
+
+; CHECK: test_x86_fmadd_ps_y
+; CHECK: vfmadd213ps %ymm2, %ymm0, %ymm1
+; CHECK: ret
+define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+ %x = fmul <8 x float> %a0, %a1
+ %res = fadd <8 x float> %x, %a2
+ ret <8 x float> %res
+}
+
+; CHECK: test_x86_fmsub_ps_y
+; CHECK: vfmsub213ps %ymm2, %ymm0, %ymm1
+; CHECK: ret
+define <8 x float> @test_x86_fmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+ %x = fmul <8 x float> %a0, %a1
+ %res = fsub <8 x float> %x, %a2
+ ret <8 x float> %res
+}
+
+; CHECK: test_x86_fnmadd_ps_y
+; CHECK: vfnmadd213ps %ymm2, %ymm0, %ymm1
+; CHECK: ret
+define <8 x float> @test_x86_fnmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+ %x = fmul <8 x float> %a0, %a1
+ %res = fsub <8 x float> %a2, %x
+ ret <8 x float> %res
+}
+
+; CHECK: test_x86_fnmsub_ps_y
+; CHECK: vfnmsub213ps %ymm2, %ymm0, %ymm1
+; CHECK: ret
+define <8 x float> @test_x86_fnmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+ %x = fmul <8 x float> %a0, %a1
+ %y = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
+ %res = fsub <8 x float> %y, %a2
+ ret <8 x float> %res
+}
+
+; CHECK: test_x86_fmadd_pd_y
+; CHECK: vfmadd213pd %ymm2, %ymm0, %ymm1
+; CHECK: ret
+define <4 x double> @test_x86_fmadd_pd_y(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+ %x = fmul <4 x double> %a0, %a1
+ %res = fadd <4 x double> %x, %a2
+ ret <4 x double> %res
+}
+
+; CHECK: test_x86_fmsub_pd_y
+; CHECK: vfmsub213pd %ymm2, %ymm0, %ymm1
+; CHECK: ret
+define <4 x double> @test_x86_fmsub_pd_y(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+ %x = fmul <4 x double> %a0, %a1
+ %res = fsub <4 x double> %x, %a2
+ ret <4 x double> %res
+}
+
+; CHECK: test_x86_fmsub_pd
+; CHECK: vfmsub213pd %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define <2 x double> @test_x86_fmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+ %x = fmul <2 x double> %a0, %a1
+ %res = fsub <2 x double> %x, %a2
+ ret <2 x double> %res
+}
+
+; CHECK: test_x86_fnmadd_ss
+; CHECK: vfnmadd213ss %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define float @test_x86_fnmadd_ss(float %a0, float %a1, float %a2) {
+ %x = fmul float %a0, %a1
+ %res = fsub float %a2, %x
+ ret float %res
+}
+
+; CHECK: test_x86_fnmadd_sd
+; CHECK: vfnmadd213sd %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define double @test_x86_fnmadd_sd(double %a0, double %a1, double %a2) {
+ %x = fmul double %a0, %a1
+ %res = fsub double %a2, %x
+ ret double %res
+}
+
+; CHECK: test_x86_fmsub_sd
+; CHECK: vfmsub213sd %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define double @test_x86_fmsub_sd(double %a0, double %a1, double %a2) {
+ %x = fmul double %a0, %a1
+ %res = fsub double %x, %a2
+ ret double %res
+}
+
+; CHECK: test_x86_fnmsub_ss
+; CHECK: vfnmsub213ss %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define float @test_x86_fnmsub_ss(float %a0, float %a1, float %a2) {
+ %x = fsub float -0.000000e+00, %a0
+ %y = fmul float %x, %a1
+ %res = fsub float %y, %a2
+ ret float %res
+}
diff --git a/test/CodeGen/X86/fold-load.ll b/test/CodeGen/X86/fold-load.ll
index e03cb7edb580..c961f7576f93 100644
--- a/test/CodeGen/X86/fold-load.ll
+++ b/test/CodeGen/X86/fold-load.ll
@@ -45,3 +45,29 @@ L:
}
+; rdar://10554090
+; xor in exit block will be CSE'ed and load will be folded to xor in entry.
+define i1 @test3(i32* %P, i32* %Q) nounwind {
+; CHECK: test3:
+; CHECK: movl 8(%esp), %eax
+; CHECK: xorl (%eax),
+; CHECK: j
+; CHECK-NOT: xor
+entry:
+ %0 = load i32* %P, align 4
+ %1 = load i32* %Q, align 4
+ %2 = xor i32 %0, %1
+ %3 = and i32 %2, 65535
+ %4 = icmp eq i32 %3, 0
+ br i1 %4, label %exit, label %land.end
+
+exit:
+ %shr.i.i19 = xor i32 %1, %0
+ %5 = and i32 %shr.i.i19, 2147418112
+ %6 = icmp eq i32 %5, 0
+ br label %land.end
+
+land.end:
+ %7 = phi i1 [ %6, %exit ], [ false, %entry ]
+ ret i1 %7
+}
diff --git a/test/CodeGen/X86/fold-pcmpeqd-1.ll b/test/CodeGen/X86/fold-pcmpeqd-1.ll
index cc4198d7caf0..d850630a4d08 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-1.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-1.ll
@@ -1,11 +1,16 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
-; RUN: grep pcmpeqd %t | count 1
-; RUN: grep xor %t | count 1
-; RUN: not grep LCP %t
+; RUN: llc < %s -march=x86 -mattr=+sse2,-avx | FileCheck %s
define <2 x double> @foo() nounwind {
ret <2 x double> bitcast (<2 x i64><i64 -1, i64 -1> to <2 x double>)
+; CHECK: foo:
+; CHECK: pcmpeqd %xmm0, %xmm0
+; CHECK-NOT: %xmm
+; CHECK: ret
}
define <2 x double> @bar() nounwind {
ret <2 x double> bitcast (<2 x i64><i64 0, i64 0> to <2 x double>)
+; CHECK: bar:
+; CHECK: xorps %xmm0, %xmm0
+; CHECK-NOT: %xmm
+; CHECK: ret
}
diff --git a/test/CodeGen/X86/force-align-stack-alloca.ll b/test/CodeGen/X86/force-align-stack-alloca.ll
new file mode 100644
index 000000000000..2ada194f891f
--- /dev/null
+++ b/test/CodeGen/X86/force-align-stack-alloca.ll
@@ -0,0 +1,70 @@
+; This test is attempting to detect when we request forced re-alignment of the
+; stack to an alignment greater than would be available due to the ABI. We
+; arbitrarily force alignment up to 32-bytes for i386 hoping that this will
+; exceed any ABI provisions.
+;
+; RUN: llc < %s -mcpu=generic -force-align-stack -stack-alignment=32 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-unknown-linux-gnu"
+
+define i32 @f(i8* %p) nounwind {
+entry:
+ %0 = load i8* %p
+ %conv = sext i8 %0 to i32
+ ret i32 %conv
+}
+
+define i64 @g(i32 %i) nounwind {
+; CHECK: g:
+; CHECK: pushl %ebp
+; CHECK-NEXT: movl %esp, %ebp
+; CHECK-NEXT: pushl
+; CHECK-NEXT: pushl
+; CHECK-NEXT: andl $-32, %esp
+; CHECK-NEXT: subl $32, %esp
+;
+; Now setup the base pointer (%esi).
+; CHECK-NEXT: movl %esp, %esi
+; CHECK-NOT: {{[^ ,]*}}, %esp
+;
+; The next adjustment of the stack is due to the alloca.
+; CHECK: movl %{{...}}, %esp
+; CHECK-NOT: {{[^ ,]*}}, %esp
+;
+; Next we set up the memset call, and then undo it.
+; CHECK: subl $32, %esp
+; CHECK-NOT: {{[^ ,]*}}, %esp
+; CHECK: calll memset
+; CHECK-NEXT: addl $32, %esp
+; CHECK-NOT: {{[^ ,]*}}, %esp
+;
+; Next we set up the call to 'f'.
+; CHECK: subl $32, %esp
+; CHECK-NOT: {{[^ ,]*}}, %esp
+; CHECK: calll f
+; CHECK-NEXT: addl $32, %esp
+; CHECK-NOT: {{[^ ,]*}}, %esp
+;
+; Restore %esp from %ebp (frame pointer) and subtract the size of
+; zone with callee-saved registers to pop them.
+; This is the state prior to stack realignment and the allocation of VLAs.
+; CHECK-NOT: popl
+; CHECK: leal -8(%ebp), %esp
+; CHECK-NEXT: popl
+; CHECK-NEXT: popl
+; CHECK-NEXT: popl %ebp
+; CHECK-NEXT: ret
+
+entry:
+ br label %if.then
+
+if.then:
+ %0 = alloca i8, i32 %i
+ call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 %i, i32 1, i1 false)
+ %call = call i32 @f(i8* %0)
+ %conv = sext i32 %call to i64
+ ret i64 %conv
+}
+
+declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1) nounwind
diff --git a/test/CodeGen/X86/fp-immediate-shorten.ll b/test/CodeGen/X86/fp-immediate-shorten.ll
index cafc61a41ff2..62d81003a62d 100644
--- a/test/CodeGen/X86/fp-immediate-shorten.ll
+++ b/test/CodeGen/X86/fp-immediate-shorten.ll
@@ -1,7 +1,7 @@
;; Test that this FP immediate is stored in the constant pool as a float.
; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3 | \
-; RUN: grep {.long.1123418112}
+; RUN: grep ".long.1123418112"
define double @D() {
ret double 1.230000e+02
diff --git a/test/CodeGen/X86/fp-in-intregs.ll b/test/CodeGen/X86/fp-in-intregs.ll
index 6966cf049789..1f5121d271c0 100644
--- a/test/CodeGen/X86/fp-in-intregs.ll
+++ b/test/CodeGen/X86/fp-in-intregs.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+; RUN: llc < %s -mtriple=i686-apple-macosx -mcpu=yonah | FileCheck %s
; CHECK-NOT: {{((xor|and)ps|movd)}}
; These operations should be done in integer registers, eliminating constant
diff --git a/test/CodeGen/X86/fp-stack-compare-cmov.ll b/test/CodeGen/X86/fp-stack-compare-cmov.ll
new file mode 100644
index 000000000000..b457fbc1a332
--- /dev/null
+++ b/test/CodeGen/X86/fp-stack-compare-cmov.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mcpu=pentiumpro | FileCheck %s
+; PR1012
+
+define float @foo(float* %col.2.0) {
+; CHECK: fucompi
+; CHECK: fcmov
+ %tmp = load float* %col.2.0
+ %tmp16 = fcmp olt float %tmp, 0.000000e+00
+ %tmp20 = fsub float -0.000000e+00, %tmp
+ %iftmp.2.0 = select i1 %tmp16, float %tmp20, float %tmp
+ ret float %iftmp.2.0
+}
diff --git a/test/CodeGen/X86/fp-stack-compare.ll b/test/CodeGen/X86/fp-stack-compare.ll
index f3998b67f672..a8557adeaf74 100644
--- a/test/CodeGen/X86/fp-stack-compare.ll
+++ b/test/CodeGen/X86/fp-stack-compare.ll
@@ -1,8 +1,11 @@
; RUN: llc < %s -march=x86 -mcpu=i386 | FileCheck %s
-; PR1012
+; PR6679
define float @foo(float* %col.2.0) {
-; CHECK: fucompi
+; CHECK: fucomp
+; CHECK-NOT: fucompi
+; CHECK: j
+; CHECK-NOT: fcmov
%tmp = load float* %col.2.0
%tmp16 = fcmp olt float %tmp, 0.000000e+00
%tmp20 = fsub float -0.000000e+00, %tmp
diff --git a/test/CodeGen/X86/fp-stack-ret.ll b/test/CodeGen/X86/fp-stack-ret.ll
index 1307f70ead17..2733117a1f02 100644
--- a/test/CodeGen/X86/fp-stack-ret.ll
+++ b/test/CodeGen/X86/fp-stack-ret.ll
@@ -22,7 +22,7 @@ define fastcc double @test2(<2 x double> %A) {
; CHECK: test3
; CHECK: sub{{.*}}%esp
-; CHECLK-NOT: xmm
+; CHECK-NOT: xmm
define fastcc double @test3(<4 x float> %A) {
%B = bitcast <4 x float> %A to <2 x double>
%C = call fastcc double @test2(<2 x double> %B)
diff --git a/test/CodeGen/X86/fp_load_fold.ll b/test/CodeGen/X86/fp_load_fold.ll
index 0145069b8cd6..a2cea5e57f64 100644
--- a/test/CodeGen/X86/fp_load_fold.ll
+++ b/test/CodeGen/X86/fp_load_fold.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
-; RUN: grep -i ST | not grep {fadd\\|fsub\\|fdiv\\|fmul}
+; RUN: grep -i ST | not grep "fadd\|fsub\|fdiv\|fmul"
; Test that the load of the memory location is folded into the operation.
diff --git a/test/CodeGen/X86/full-lsr.ll b/test/CodeGen/X86/full-lsr.ll
index ff9b1b0b6a5a..1344cdcd4320 100644
--- a/test/CodeGen/X86/full-lsr.ll
+++ b/test/CodeGen/X86/full-lsr.ll
@@ -1,9 +1,17 @@
-; RUN: llc < %s -march=x86 >%t
-
-; RUN: grep {addl \\\$4,} %t | count 3
-; RUN: not grep {,%} %t
+; RUN: llc < %s -march=x86 -mcpu=generic | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=atom | FileCheck -check-prefix=ATOM %s
define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind {
+; ATOM: foo
+; ATOM: addl
+; ATOM: leal
+; ATOM: leal
+
+; CHECK: foo
+; CHECK: addl
+; CHECK: addl
+; CEHCK: addl
+
entry:
%0 = icmp sgt i32 %N, 0 ; <i1> [#uses=1]
br i1 %0, label %bb, label %return
diff --git a/test/CodeGen/X86/gather-addresses.ll b/test/CodeGen/X86/gather-addresses.ll
index 4a6927f6a269..72a50961b2ff 100644
--- a/test/CodeGen/X86/gather-addresses.ll
+++ b/test/CodeGen/X86/gather-addresses.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s
-; RUN: llc -mtriple=x86_64-win32 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-linux -mcpu=nehalem < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-win32 -mcpu=nehalem < %s | FileCheck %s
; rdar://7398554
; When doing vector gather-scatter index calculation with 32-bit indices,
diff --git a/test/CodeGen/X86/gs-fold.ll b/test/CodeGen/X86/gs-fold.ll
new file mode 100644
index 000000000000..dbec76ba52c4
--- /dev/null
+++ b/test/CodeGen/X86/gs-fold.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-freebsd | FileCheck %s --check-prefix=CHECK-FBSD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux | FileCheck %s --check-prefix=CHECK-LINUX
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+%struct.thread = type { i32, i32, i32, i32 }
+
+define i32 @test() nounwind uwtable {
+entry:
+ %0 = load volatile %struct.thread* addrspace(256)* null
+ %c = getelementptr inbounds %struct.thread* %0, i64 0, i32 2
+ %1 = load i32* %c, align 4
+ ret i32 %1
+}
+
+; Check that we are not assuming that gs contains the address of gs if we are not targeting Linux
+; CHECK-FBSD: movq %gs:0, %rax
+; CHECK-FBSD: movl 8(%rax), %eax
+; Check that we are assuming that gs contains the address of gs if we are targeting Linux
+; CHECK-LINUX: movl %gs:8, %eax
+
diff --git a/test/CodeGen/X86/h-register-addressing-32.ll b/test/CodeGen/X86/h-register-addressing-32.ll
index 76ffd66524b9..968a9e88c0e9 100644
--- a/test/CodeGen/X86/h-register-addressing-32.ll
+++ b/test/CodeGen/X86/h-register-addressing-32.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep {movzbl %\[abcd\]h,} | count 7
+; RUN: llc < %s -march=x86 | grep "movzbl %[abcd]h," | count 7
; Use h-register extract and zero-extend.
diff --git a/test/CodeGen/X86/h-register-addressing-64.ll b/test/CodeGen/X86/h-register-addressing-64.ll
index 98817f3fb59f..a19fca555811 100644
--- a/test/CodeGen/X86/h-register-addressing-64.ll
+++ b/test/CodeGen/X86/h-register-addressing-64.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | grep {movzbl %\[abcd\]h,} | count 7
+; RUN: llc < %s -march=x86-64 | grep "movzbl %[abcd]h," | count 7
; Use h-register extract and zero-extend.
diff --git a/test/CodeGen/X86/h-registers-1.ll b/test/CodeGen/X86/h-registers-1.ll
index 402cdfe413b5..903c4538aba7 100644
--- a/test/CodeGen/X86/h-registers-1.ll
+++ b/test/CodeGen/X86/h-registers-1.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=x86_64-linux > %t
-; RUN: grep {movzbl %\[abcd\]h,} %t | count 8
-; RUN: grep {%\[abcd\]h} %t | not grep {%r\[\[:digit:\]\]*d}
+; RUN: grep "movzbl %[abcd]h," %t | count 8
+; RUN: grep "%[abcd]h" %t | not grep "%r[[:digit:]]*d"
; LLVM creates virtual registers for values live across blocks
; based on the type of the value. Make sure that the extracts
diff --git a/test/CodeGen/X86/hoist-invariant-load.ll b/test/CodeGen/X86/hoist-invariant-load.ll
index 4289fa7cc254..74ecd045b3d5 100644
--- a/test/CodeGen/X86/hoist-invariant-load.ll
+++ b/test/CodeGen/X86/hoist-invariant-load.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -stats -O2 |& grep "1 machine-licm"
+; RUN: llc < %s -stats -O2 2>&1 | grep "1 machine-licm"
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.7.2"
diff --git a/test/CodeGen/X86/iabs.ll b/test/CodeGen/X86/iabs.ll
index a8ba0155fd10..9196cce1ae5a 100644
--- a/test/CodeGen/X86/iabs.ll
+++ b/test/CodeGen/X86/iabs.ll
@@ -1,13 +1,17 @@
-; RUN: llc < %s -march=x86-64 -stats |& \
-; RUN: grep {5 .*Number of machine instrs printed}
+; RUN: llc < %s -march=x86-64 | FileCheck %s
;; Integer absolute value, should produce something at least as good as:
-;; movl %edi, %ecx
-;; sarl $31, %ecx
-;; leal (%rdi,%rcx), %eax
-;; xorl %ecx, %eax
+;; movl %edi, %eax
+;; negl %eax
+;; cmovll %edi, %eax
;; ret
+; rdar://10695237
define i32 @test(i32 %a) nounwind {
+; CHECK: test:
+; CHECK: mov
+; CHECK-NEXT: neg
+; CHECK-NEXT: cmov
+; CHECK-NEXT: ret
%tmp1neg = sub i32 0, %a
%b = icmp sgt i32 %a, -1
%abs = select i1 %b, i32 %a, i32 %tmp1neg
diff --git a/test/CodeGen/X86/illegal-vector-args-return.ll b/test/CodeGen/X86/illegal-vector-args-return.ll
index cecf77af4de1..62a21f4c5aad 100644
--- a/test/CodeGen/X86/illegal-vector-args-return.ll
+++ b/test/CodeGen/X86/illegal-vector-args-return.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {mulpd %xmm3, %xmm1}
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {mulpd %xmm2, %xmm0}
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {addps %xmm3, %xmm1}
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {addps %xmm2, %xmm0}
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=nehalem | grep "mulpd %xmm3, %xmm1"
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=nehalem | grep "mulpd %xmm2, %xmm0"
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=nehalem | grep "addps %xmm3, %xmm1"
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=nehalem | grep "addps %xmm2, %xmm0"
define <4 x double> @foo(<4 x double> %x, <4 x double> %z) {
%y = fmul <4 x double> %x, %z
diff --git a/test/CodeGen/X86/inline-asm-error.ll b/test/CodeGen/X86/inline-asm-error.ll
index 134d6e952833..747a5891cf04 100644
--- a/test/CodeGen/X86/inline-asm-error.ll
+++ b/test/CodeGen/X86/inline-asm-error.ll
@@ -1,4 +1,4 @@
-; RUN: not llc -march x86 -regalloc=fast < %s 2> %t1
+; RUN: not llc -march x86 -regalloc=fast -optimize-regalloc=0 < %s 2> %t1
; RUN: not llc -march x86 -regalloc=basic < %s 2> %t2
; RUN: not llc -march x86 -regalloc=greedy < %s 2> %t3
; RUN: FileCheck %s < %t1
diff --git a/test/CodeGen/X86/inline-asm-modifier-n.ll b/test/CodeGen/X86/inline-asm-modifier-n.ll
index 5e76b6c0580e..b069c4631899 100644
--- a/test/CodeGen/X86/inline-asm-modifier-n.ll
+++ b/test/CodeGen/X86/inline-asm-modifier-n.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep { 37}
+; RUN: llc < %s -march=x86 | grep " 37"
; rdar://7008959
define void @bork() nounwind {
diff --git a/test/CodeGen/X86/inline-asm.ll b/test/CodeGen/X86/inline-asm.ll
index eef6c2f377a7..e6eb9efd8c78 100644
--- a/test/CodeGen/X86/inline-asm.ll
+++ b/test/CodeGen/X86/inline-asm.ll
@@ -43,3 +43,12 @@ entry:
%0 = tail call i8 asm sideeffect "xchg $0, $1", "=r,*m,0,~{memory},~{dirflag},~{fpsr},~{flags}"(i32* %p, i1 %desired) nounwind
ret void
}
+
+; <rdar://problem/11542429>
+; The constrained GR32_ABCD register class of the 'q' constraint requires
+; special handling after the preceding outputs used up eax-edx.
+define void @constrain_abcd(i8* %h) nounwind ssp {
+entry:
+ %0 = call { i32, i32, i32, i32, i32 } asm sideeffect "", "=&r,=&r,=&r,=&r,=&q,r,~{ecx},~{memory},~{dirflag},~{fpsr},~{flags}"(i8* %h) nounwind
+ ret void
+}
diff --git a/test/CodeGen/X86/inreg.ll b/test/CodeGen/X86/inreg.ll
new file mode 100644
index 000000000000..6653cfb14ed8
--- /dev/null
+++ b/test/CodeGen/X86/inreg.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=i686-pc-linux -mcpu=corei7 | FileCheck --check-prefix=DAG %s
+; RUN: llc < %s -mtriple=i686-pc-linux -mcpu=corei7 -O0 | FileCheck --check-prefix=FAST %s
+
+%struct.s1 = type { double, float }
+
+define void @g1() nounwind {
+entry:
+ %tmp = alloca %struct.s1, align 4
+ call void @f(%struct.s1* inreg sret %tmp, i32 inreg 41, i32 inreg 42, i32 43)
+ ret void
+ ; DAG: g1:
+ ; DAG: subl $[[AMT:.*]], %esp
+ ; DAG-NEXT: $43, (%esp)
+ ; DAG-NEXT: leal 16(%esp), %eax
+ ; DAG-NEXT: movl $41, %edx
+ ; DAG-NEXT: movl $42, %ecx
+ ; DAG-NEXT: calll f
+ ; DAG-NEXT: addl $[[AMT]], %esp
+ ; DAG-NEXT: ret
+
+ ; FAST: g1:
+ ; FAST: subl $[[AMT:.*]], %esp
+ ; FAST-NEXT: leal 8(%esp), %eax
+ ; FAST-NEXT: movl $41, %edx
+ ; FAST-NEXT: movl $42, %ecx
+ ; FAST: $43, (%esp)
+ ; FAST: calll f
+ ; FAST-NEXT: addl $[[AMT]], %esp
+ ; FAST: ret
+}
+
+declare void @f(%struct.s1* inreg sret, i32 inreg, i32 inreg, i32)
+
+%struct.s2 = type {}
+
+define void @g2(%struct.s2* inreg sret %agg.result) nounwind {
+entry:
+ ret void
+ ; DAG: g2
+ ; DAG-NOT: ret $4
+ ; DAG: .size g2
+
+ ; FAST: g2
+ ; FAST-NOT: ret $4
+ ; FAST: .size g2
+}
diff --git a/test/CodeGen/X86/isel-sink2.ll b/test/CodeGen/X86/isel-sink2.ll
index 5ed0e00fd873..b162666362aa 100644
--- a/test/CodeGen/X86/isel-sink2.ll
+++ b/test/CodeGen/X86/isel-sink2.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86 > %t
-; RUN: grep {movb.7(%...)} %t
+; RUN: grep "movb.7(%...)" %t
; RUN: not grep leal %t
define i8 @test(i32 *%P) nounwind {
diff --git a/test/CodeGen/X86/ispositive.ll b/test/CodeGen/X86/ispositive.ll
index 8adf723aabc3..b1d1a20c8eb6 100644
--- a/test/CodeGen/X86/ispositive.ll
+++ b/test/CodeGen/X86/ispositive.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep {shrl.*31}
+; RUN: llc < %s -march=x86 | grep "shrl.*31"
define i32 @test1(i32 %X) {
entry:
diff --git a/test/CodeGen/X86/jump_sign.ll b/test/CodeGen/X86/jump_sign.ll
index dbd133cd9ab4..48e21061d209 100644
--- a/test/CodeGen/X86/jump_sign.ll
+++ b/test/CodeGen/X86/jump_sign.ll
@@ -22,6 +22,7 @@ declare i32 @bar(...)
declare i32 @baz(...)
; rdar://10633221
+; rdar://11355268
define i32 @g(i32 %a, i32 %b) nounwind {
entry:
; CHECK: g:
@@ -32,3 +33,223 @@ entry:
%cond = select i1 %cmp, i32 %sub, i32 0
ret i32 %cond
}
+
+; rdar://10734411
+define i32 @h(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: h:
+; CHECK-NOT: cmp
+; CHECK: cmov
+; CHECK-NOT: movl
+; CHECK: ret
+ %cmp = icmp slt i32 %b, %a
+ %sub = sub nsw i32 %a, %b
+ %cond = select i1 %cmp, i32 %sub, i32 0
+ ret i32 %cond
+}
+define i32 @i(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: i:
+; CHECK-NOT: cmp
+; CHECK: cmov
+; CHECK-NOT: movl
+; CHECK: ret
+ %cmp = icmp sgt i32 %a, %b
+ %sub = sub nsw i32 %a, %b
+ %cond = select i1 %cmp, i32 %sub, i32 0
+ ret i32 %cond
+}
+define i32 @j(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: j:
+; CHECK-NOT: cmp
+; CHECK: cmov
+; CHECK-NOT: movl
+; CHECK: ret
+ %cmp = icmp ugt i32 %a, %b
+ %sub = sub i32 %a, %b
+ %cond = select i1 %cmp, i32 %sub, i32 0
+ ret i32 %cond
+}
+define i32 @k(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: k:
+; CHECK-NOT: cmp
+; CHECK: cmov
+; CHECK-NOT: movl
+; CHECK: ret
+ %cmp = icmp ult i32 %b, %a
+ %sub = sub i32 %a, %b
+ %cond = select i1 %cmp, i32 %sub, i32 0
+ ret i32 %cond
+}
+; redundant cmp instruction
+define i32 @l(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: l:
+; CHECK-NOT: cmp
+ %cmp = icmp slt i32 %b, %a
+ %sub = sub nsw i32 %a, %b
+ %cond = select i1 %cmp, i32 %sub, i32 %a
+ ret i32 %cond
+}
+define i32 @m(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: m:
+; CHECK-NOT: cmp
+ %cmp = icmp sgt i32 %a, %b
+ %sub = sub nsw i32 %a, %b
+ %cond = select i1 %cmp, i32 %b, i32 %sub
+ ret i32 %cond
+}
+; If EFLAGS is live-out, we can't remove cmp if there exists
+; a swapped sub.
+define i32 @l2(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: l2:
+; CHECK: cmp
+ %cmp = icmp eq i32 %b, %a
+ %sub = sub nsw i32 %a, %b
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ %cmp2 = icmp sgt i32 %b, %a
+ %sel = select i1 %cmp2, i32 %sub, i32 %a
+ ret i32 %sel
+
+if.else:
+ ret i32 %sub
+}
+define i32 @l3(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: l3:
+; CHECK: sub
+; CHECK-NOT: cmp
+; CHECK: jge
+ %cmp = icmp sgt i32 %b, %a
+ %sub = sub nsw i32 %a, %b
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ ret i32 %sub
+
+if.else:
+ %add = add nsw i32 %sub, 1
+ ret i32 %add
+}
+; rdar://11830760
+; When Movr0 is between sub and cmp, we need to move "Movr0" before sub.
+define i32 @l4(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: l4:
+; CHECK: xor
+; CHECK: sub
+; CHECK-NOT: cmp
+ %cmp = icmp sgt i32 %b, %a
+ %sub = sub i32 %a, %b
+ %.sub = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %.sub
+}
+; rdar://11540023
+define i32 @n(i32 %x, i32 %y) nounwind {
+entry:
+; CHECK: n:
+; CHECK-NOT: sub
+; CHECK: cmp
+ %sub = sub nsw i32 %x, %y
+ %cmp = icmp slt i32 %sub, 0
+ %y.x = select i1 %cmp, i32 %y, i32 %x
+ ret i32 %y.x
+}
+; PR://13046
+define void @o() nounwind uwtable {
+entry:
+ %0 = load i16* undef, align 2
+ br i1 undef, label %if.then.i, label %if.end.i
+
+if.then.i: ; preds = %entry
+ unreachable
+
+if.end.i: ; preds = %entry
+ br i1 undef, label %sw.bb, label %sw.default
+
+sw.bb: ; preds = %if.end.i
+ br i1 undef, label %if.then44, label %if.end29
+
+if.end29: ; preds = %sw.bb
+; CHECK: o:
+; CHECK: cmp
+ %1 = urem i16 %0, 10
+ %cmp25 = icmp eq i16 %1, 0
+ %. = select i1 %cmp25, i16 2, i16 0
+ br i1 %cmp25, label %if.then44, label %sw.default
+
+sw.default: ; preds = %if.end29, %if.end.i
+ br i1 undef, label %if.then.i96, label %if.else.i97
+
+if.then.i96: ; preds = %sw.default
+ unreachable
+
+if.else.i97: ; preds = %sw.default
+ unreachable
+
+if.then44: ; preds = %if.end29, %sw.bb
+ %aModeRefSel.1.ph = phi i16 [ %., %if.end29 ], [ 3, %sw.bb ]
+ br i1 undef, label %if.then.i103, label %if.else.i104
+
+if.then.i103: ; preds = %if.then44
+ unreachable
+
+if.else.i104: ; preds = %if.then44
+ ret void
+}
+; rdar://11855129
+define i32 @p(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: p:
+; CHECK-NOT: test
+; CHECK: cmovs
+ %add = add nsw i32 %b, %a
+ %cmp = icmp sgt i32 %add, 0
+ %add. = select i1 %cmp, i32 %add, i32 0
+ ret i32 %add.
+}
+; PR13475
+; If we have sub a, b and cmp b, a and the result of cmp is used
+; by sbb, we should not optimize cmp away.
+define i32 @q(i32 %j.4, i32 %w, i32 %el) {
+; CHECK: q:
+; CHECK: sub
+; CHECK: cmp
+; CHECK-NEXT: sbb
+ %tmp532 = add i32 %j.4, %w
+ %tmp533 = icmp ugt i32 %tmp532, %el
+ %tmp534 = icmp ult i32 %w, %el
+ %or.cond = and i1 %tmp533, %tmp534
+ %tmp535 = sub i32 %el, %w
+ %j.5 = select i1 %or.cond, i32 %tmp535, i32 %j.4
+ ret i32 %j.5
+}
+; rdar://11873276
+define i8* @r(i8* %base, i32* nocapture %offset, i32 %size) nounwind {
+entry:
+; CHECK: r:
+; CHECK: sub
+; CHECK-NOT: cmp
+; CHECK: j
+; CHECK-NOT: sub
+; CHECK: ret
+ %0 = load i32* %offset, align 8
+ %cmp = icmp slt i32 %0, %size
+ br i1 %cmp, label %return, label %if.end
+
+if.end:
+ %sub = sub nsw i32 %0, %size
+ store i32 %sub, i32* %offset, align 8
+ %add.ptr = getelementptr inbounds i8* %base, i32 %sub
+ br label %return
+
+return:
+ %retval.0 = phi i8* [ %add.ptr, %if.end ], [ null, %entry ]
+ ret i8* %retval.0
+}
diff --git a/test/CodeGen/X86/label-redefinition.ll b/test/CodeGen/X86/label-redefinition.ll
index 9ad33e029766..9e88a18e8732 100644
--- a/test/CodeGen/X86/label-redefinition.ll
+++ b/test/CodeGen/X86/label-redefinition.ll
@@ -1,5 +1,5 @@
; PR7054
-; RUN: not llc %s -o - |& grep {'_foo' label emitted multiple times to assembly}
+; RUN: not llc %s -o - 2>&1 | grep "'_foo' label emitted multiple times to assembly"
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
target triple = "i386-apple-darwin10.0.0"
diff --git a/test/CodeGen/X86/large-global.ll b/test/CodeGen/X86/large-global.ll
new file mode 100644
index 000000000000..7cb974b21e73
--- /dev/null
+++ b/test/CodeGen/X86/large-global.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck %s
+; rdar://11729134
+
+; EmitZerofill was incorrectly expecting a 32-bit "size" so 26214400000
+; was printed as 444596224
+
+%struct.X = type { [25000 x i8] }
+
+@gArray = global [1048576 x %struct.X] zeroinitializer, align 16
+
+; CHECK: .zerofill __DATA,__common,_gArray,26214400000,4
diff --git a/test/CodeGen/X86/lea-2.ll b/test/CodeGen/X86/lea-2.ll
index 69303507d6e6..43f69b0c6e93 100644
--- a/test/CodeGen/X86/lea-2.ll
+++ b/test/CodeGen/X86/lea-2.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
-; RUN: grep {lea EAX, DWORD PTR \\\[... + 4\\*... - 5\\\]}
+; RUN: grep "lea EAX, DWORD PTR \[... + 4\*... - 5\]"
; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
; RUN: not grep add
diff --git a/test/CodeGen/X86/liveness-local-regalloc.ll b/test/CodeGen/X86/liveness-local-regalloc.ll
index b469d0837dc5..721f545985d7 100644
--- a/test/CodeGen/X86/liveness-local-regalloc.ll
+++ b/test/CodeGen/X86/liveness-local-regalloc.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -O3 -regalloc=fast -mtriple=x86_64-apple-darwin10
+; RUN: llc < %s -regalloc=fast -optimize-regalloc=0 -verify-machineinstrs -mtriple=x86_64-apple-darwin10
; <rdar://problem/7755473>
+; PR12821
%0 = type { i32, i8*, i8*, %1*, i8*, i64, i64, i32, i32, i32, i32, [1024 x i8] }
%1 = type { i8*, i32, i32, i16, i16, %2, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %2, %3*, i32, [3 x i8], [1 x i8], %2, i32, i64 }
@@ -58,3 +59,34 @@ infloop: ; preds = %infloop, %bb3
infloop1: ; preds = %infloop1, %bb5
br label %infloop1
}
+
+
+; RAFast would forget to add a super-register <imp-def> when rewriting:
+; %vreg10:sub_32bit<def,read-undef> = COPY %R9D<kill>
+; This trips up the machine code verifier.
+define void @autogen_SD24657(i8*, i32*, i64*, i32, i64, i8) {
+BB:
+ %A4 = alloca <16 x i16>
+ %A3 = alloca double
+ %A2 = alloca <2 x i8>
+ %A1 = alloca i1
+ %A = alloca i32
+ %L = load i8* %0
+ store i8 -37, i8* %0
+ %E = extractelement <4 x i64> zeroinitializer, i32 2
+ %Shuff = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 5, i32 7, i32 1, i32 3>
+ %I = insertelement <2 x i8> <i8 -1, i8 -1>, i8 %5, i32 1
+ %B = fadd float 0x45CDF5B1C0000000, 0x45CDF5B1C0000000
+ %FC = uitofp i32 275048 to double
+ %Sl = select i1 true, <2 x i8> %I, <2 x i8> <i8 -1, i8 -1>
+ %Cmp = icmp slt i64 0, %E
+ br label %CF
+
+CF: ; preds = %BB
+ store i8 %5, i8* %0
+ store <2 x i8> %I, <2 x i8>* %A2
+ store i8 %5, i8* %0
+ store i8 %5, i8* %0
+ store i8 %5, i8* %0
+ ret void
+}
diff --git a/test/CodeGen/X86/loop-blocks.ll b/test/CodeGen/X86/loop-blocks.ll
index d14102fe245b..4bd162b45294 100644
--- a/test/CodeGen/X86/loop-blocks.ll
+++ b/test/CodeGen/X86/loop-blocks.ll
@@ -41,7 +41,6 @@ done:
; CHECK-NEXT: align
; CHECK-NEXT: .LBB1_4:
; CHECK-NEXT: callq bar99
-; CHECK-NEXT: align
; CHECK-NEXT: .LBB1_1:
; CHECK-NEXT: callq body
@@ -79,7 +78,6 @@ exit:
; CHECK-NEXT: .LBB2_5:
; CHECK-NEXT: callq block_a_true_func
; CHECK-NEXT: callq block_a_merge_func
-; CHECK-NEXT: align
; CHECK-NEXT: .LBB2_1:
; CHECK-NEXT: callq body
;
@@ -139,13 +137,13 @@ exit:
; CHECK-NEXT: align
; CHECK-NEXT: .LBB3_7:
; CHECK-NEXT: callq bar100
-; CHECK-NEXT: align
; CHECK-NEXT: .LBB3_1:
; CHECK-NEXT: callq loop_header
; CHECK: jl .LBB3_7
; CHECK: jge .LBB3_3
; CHECK-NEXT: callq bar101
; CHECK-NEXT: jmp .LBB3_1
+; CHECK-NEXT: align
; CHECK-NEXT: .LBB3_3:
; CHECK: jge .LBB3_4
; CHECK-NEXT: callq bar102
diff --git a/test/CodeGen/X86/lsr-loop-exit-cond.ll b/test/CodeGen/X86/lsr-loop-exit-cond.ll
index ebda9f201df9..8a81f70a8a2a 100644
--- a/test/CodeGen/X86/lsr-loop-exit-cond.ll
+++ b/test/CodeGen/X86/lsr-loop-exit-cond.ll
@@ -1,10 +1,16 @@
-; RUN: llc -mtriple=x86_64-darwin < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-darwin -mcpu=generic < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-darwin -mcpu=atom < %s | FileCheck -check-prefix=ATOM %s
; CHECK: t:
; CHECK: decq
-; CHECK-NEXT: movl (
+; CHECK-NEXT: movl (%r9,%rax,4), %eax
; CHECK-NEXT: jne
+; ATOM: t:
+; ATOM: movl (%r9,%rax,4), %eax
+; ATOM-NEXT: decq
+; ATOM-NEXT: jne
+
@Te0 = external global [256 x i32] ; <[256 x i32]*> [#uses=5]
@Te1 = external global [256 x i32] ; <[256 x i32]*> [#uses=4]
@Te3 = external global [256 x i32] ; <[256 x i32]*> [#uses=2]
@@ -149,6 +155,13 @@ bb2: ; preds = %bb
; CHECK: jne
; CHECK: ret
+; ATOM: f:
+; ATOM: %for.body
+; ATOM: incl [[IV:%e..]]
+; ATOM: cmpl $1, [[IV]]
+; ATOM: jne
+; ATOM: ret
+
define i32 @f(i32 %i, i32* nocapture %a) nounwind uwtable readonly ssp {
entry:
%cmp4 = icmp eq i32 %i, 1
diff --git a/test/CodeGen/X86/lsr-reuse-trunc.ll b/test/CodeGen/X86/lsr-reuse-trunc.ll
index 1f87089f80e7..276dab72f7cc 100644
--- a/test/CodeGen/X86/lsr-reuse-trunc.ll
+++ b/test/CodeGen/X86/lsr-reuse-trunc.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=nehalem | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=nehalem | FileCheck %s
; Full strength reduction wouldn't reduce register pressure, so LSR should
; stick with indexing here.
diff --git a/test/CodeGen/X86/lsr-static-addr.ll b/test/CodeGen/X86/lsr-static-addr.ll
index c9ed3e553a46..6566f563784d 100644
--- a/test/CodeGen/X86/lsr-static-addr.ll
+++ b/test/CodeGen/X86/lsr-static-addr.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -asm-verbose=false < %s | FileCheck %s
+; RUN: llc -march=x86-64 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -asm-verbose=false < %s | FileCheck %s
+; RUN: llc -march=x86-64 -mcpu=atom -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -asm-verbose=false < %s | FileCheck -check-prefix=ATOM %s
; CHECK: xorl %eax, %eax
; CHECK: movsd .LCPI0_0(%rip), %xmm0
@@ -9,6 +10,15 @@
; CHECK-NEXT: movsd
; CHECK-NEXT: incq %rax
+; ATOM: movsd .LCPI0_0(%rip), %xmm0
+; ATOM: xorl %eax, %eax
+; ATOM: align
+; ATOM-NEXT: BB0_2:
+; ATOM-NEXT: movsd A(,%rax,8)
+; ATOM-NEXT: mulsd
+; ATOM-NEXT: movsd
+; ATOM-NEXT: incq %rax
+
@A = external global [0 x double]
define void @foo(i64 %n) nounwind {
diff --git a/test/CodeGen/X86/machine-cse.ll b/test/CodeGen/X86/machine-cse.ll
index a757cde6abe9..d171fd5f1d9f 100644
--- a/test/CodeGen/X86/machine-cse.ll
+++ b/test/CodeGen/X86/machine-cse.ll
@@ -99,3 +99,60 @@ return: ; preds = %if.end, %entry
%retval.0 = phi i32 [ 1, %entry ], [ %., %if.end ]
ret i32 %retval.0
}
+
+; rdar://11393714
+define i8* @bsd_memchr(i8* %s, i32 %a, i32 %c, i64 %n) nounwind ssp {
+; CHECK: %entry
+; CHECK: xorl
+; CHECK: %preheader
+; CHECK: %do.body
+; CHECK-NOT: xorl
+; CHECK: %do.cond
+; CHECK-NOT: xorl
+; CHECK: %return
+entry:
+ %cmp = icmp eq i64 %n, 0
+ br i1 %cmp, label %return, label %preheader
+
+preheader:
+ %conv2 = and i32 %c, 255
+ br label %do.body
+
+do.body:
+ %n.addr.0 = phi i64 [ %dec, %do.cond ], [ %n, %preheader ]
+ %p.0 = phi i8* [ %incdec.ptr, %do.cond ], [ %s, %preheader ]
+ %cmp3 = icmp eq i32 %a, %conv2
+ br i1 %cmp3, label %return, label %do.cond
+
+do.cond:
+ %incdec.ptr = getelementptr inbounds i8* %p.0, i64 1
+ %dec = add i64 %n.addr.0, -1
+ %cmp6 = icmp eq i64 %dec, 0
+ br i1 %cmp6, label %return, label %do.body
+
+return:
+ %retval.0 = phi i8* [ null, %entry ], [ null, %do.cond ], [ %p.0, %do.body ]
+ ret i8* %retval.0
+}
+
+; PR13578
+@t2_global = external global i32
+
+declare i1 @t2_func()
+
+define i32 @t2() {
+ store i32 42, i32* @t2_global
+ %c = call i1 @t2_func()
+ br i1 %c, label %a, label %b
+
+a:
+ %l = load i32* @t2_global
+ ret i32 %l
+
+b:
+ ret i32 0
+
+; CHECK: t2:
+; CHECK: t2_global@GOTPCREL(%rip)
+; CHECK-NOT: t2_global@GOTPCREL(%rip)
+}
diff --git a/test/CodeGen/X86/mem-promote-integers.ll b/test/CodeGen/X86/mem-promote-integers.ll
index 80103d10388b..0015df0c1fac 100644
--- a/test/CodeGen/X86/mem-promote-integers.ll
+++ b/test/CodeGen/X86/mem-promote-integers.ll
@@ -1,8 +1,8 @@
; Test the basic functionality of integer element promotions of different types.
; This tests checks passing of arguments, loading and storing to memory and
; basic arithmetic.
-; RUN: llc -march=x86 -promote-elements < %s
-; RUN: llc -march=x86-64 -promote-elements < %s
+; RUN: llc -march=x86 < %s
+; RUN: llc -march=x86-64 < %s
define <1 x i8> @test_1xi8(<1 x i8> %x, <1 x i8>* %b) {
%bb = load <1 x i8>* %b
diff --git a/test/CodeGen/X86/memcmp.ll b/test/CodeGen/X86/memcmp.ll
index f4bc1bb7015a..723d1d89427e 100644
--- a/test/CodeGen/X86/memcmp.ll
+++ b/test/CodeGen/X86/memcmp.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -disable-simplify-libcalls -mtriple=x86_64-linux | FileCheck %s --check-prefix=NOBUILTIN
; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
; This tests codegen time inlining/optimization of memcmp
@@ -23,6 +24,8 @@ return: ; preds = %entry
; CHECK: memcmp2:
; CHECK: movw ([[A0:%rdi|%rcx]]), %ax
; CHECK: cmpw ([[A1:%rsi|%rdx]]), %ax
+; NOBUILTIN: memcmp2:
+; NOBUILTIN: callq
}
define void @memcmp2a(i8* %X, i32* nocapture %P) nounwind {
diff --git a/test/CodeGen/X86/mmx-punpckhdq.ll b/test/CodeGen/X86/mmx-punpckhdq.ll
index 689f7bf59564..206cb33494cf 100644
--- a/test/CodeGen/X86/mmx-punpckhdq.ll
+++ b/test/CodeGen/X86/mmx-punpckhdq.ll
@@ -3,7 +3,7 @@
define void @bork(<1 x i64>* %x) {
; CHECK: bork
-; CHECK: pextrd
+; CHECK: movlpd
entry:
%tmp2 = load <1 x i64>* %x ; <<1 x i64>> [#uses=1]
%tmp6 = bitcast <1 x i64> %tmp2 to <2 x i32> ; <<2 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/movgs.ll b/test/CodeGen/X86/movgs.ll
index aeb540fe4232..65ee7b1d8e00 100644
--- a/test/CodeGen/X86/movgs.ll
+++ b/test/CodeGen/X86/movgs.ll
@@ -55,4 +55,20 @@ entry:
; X64: ret
}
+; The two loads here both look identical to selection DAG, except for their
+; address spaces. Make sure they aren't CSE'd.
+define i32 @test_no_cse() nounwind readonly {
+entry:
+ %tmp = load i32* addrspace(256)* getelementptr (i32* addrspace(256)* inttoptr (i32 72 to i32* addrspace(256)*), i32 31) ; <i32*> [#uses=1]
+ %tmp1 = load i32* %tmp ; <i32> [#uses=1]
+ %tmp2 = load i32* addrspace(257)* getelementptr (i32* addrspace(257)* inttoptr (i32 72 to i32* addrspace(257)*), i32 31) ; <i32*> [#uses=1]
+ %tmp3 = load i32* %tmp2 ; <i32> [#uses=1]
+ %tmp4 = add i32 %tmp1, %tmp3
+ ret i32 %tmp4
+}
+; X32: test_no_cse:
+; X32: movl %gs:196
+; X32: movl %fs:196
+; X32: ret
+
declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
diff --git a/test/CodeGen/X86/multiple-loop-post-inc.ll b/test/CodeGen/X86/multiple-loop-post-inc.ll
index 4f7e28ace3cd..9f7d036cf141 100644
--- a/test/CodeGen/X86/multiple-loop-post-inc.ll
+++ b/test/CodeGen/X86/multiple-loop-post-inc.ll
@@ -1,9 +1,9 @@
-; RUN: llc -asm-verbose=false -disable-branch-fold -disable-code-place -disable-tail-duplicate -march=x86-64 < %s | FileCheck %s
+; RUN: llc -asm-verbose=false -disable-branch-fold -disable-code-place -disable-tail-duplicate -march=x86-64 -mcpu=nehalem < %s | FileCheck %s
; rdar://7236213
-
-; Xfailed now that scheduler 2-address hack is disabled a lea is generated.
-; The code isn't any worse though.
-; XFAIL: *
+;
+; The scheduler's 2-address hack has been disabled, so there is
+; currently no good guarantee that this test will pass until the
+; machine scheduler develops an equivalent heuristic.
; CodeGen shouldn't require any lea instructions inside the marked loop.
; It should properly set up post-increment uses and do coalescing for
diff --git a/test/CodeGen/X86/neg_cmp.ll b/test/CodeGen/X86/neg_cmp.ll
new file mode 100644
index 000000000000..866514ed9a2f
--- /dev/null
+++ b/test/CodeGen/X86/neg_cmp.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; rdar://11245199
+; PR12545
+define void @f(i32 %x, i32 %y) nounwind uwtable ssp {
+entry:
+; CHECK: f:
+; CHECK-NOT: neg
+; CHECK: add
+ %sub = sub i32 0, %y
+ %cmp = icmp eq i32 %x, %sub
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void @g() nounwind
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret void
+}
+
+declare void @g()
diff --git a/test/CodeGen/X86/opt-shuff-tstore.ll b/test/CodeGen/X86/opt-shuff-tstore.ll
index fc24913be529..3e720844c437 100644
--- a/test/CodeGen/X86/opt-shuff-tstore.ll
+++ b/test/CodeGen/X86/opt-shuff-tstore.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s -promote-elements -mattr=+sse2,+sse41 | FileCheck %s
+; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s -mattr=+sse2,+sse41 | FileCheck %s
; CHECK: func_4_8
; A single memory write
diff --git a/test/CodeGen/X86/overlap-shift.ll b/test/CodeGen/X86/overlap-shift.ll
index d185af16b90b..e987495f2c01 100644
--- a/test/CodeGen/X86/overlap-shift.ll
+++ b/test/CodeGen/X86/overlap-shift.ll
@@ -7,7 +7,7 @@
; Check that the shift gets turned into an LEA.
; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
-; RUN: not grep {mov E.X, E.X}
+; RUN: not grep "mov E.X, E.X"
@G = external global i32 ; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/pass-three.ll b/test/CodeGen/X86/pass-three.ll
new file mode 100644
index 000000000000..23005c77c13d
--- /dev/null
+++ b/test/CodeGen/X86/pass-three.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin11.3.0"
+
+
+define { i8*, i64, i64* } @copy_3(i8* %a, i64 %b, i64* %c) nounwind {
+entry:
+ %0 = insertvalue { i8*, i64, i64* } undef, i8* %a, 0
+ %1 = insertvalue { i8*, i64, i64* } %0, i64 %b, 1
+ %2 = insertvalue { i8*, i64, i64* } %1, i64* %c, 2
+ ret { i8*, i64, i64* } %2
+}
+
+; CHECK: copy_3:
+; CHECK-NOT: (%rdi)
+; CHECK: ret
diff --git a/test/CodeGen/X86/peep-vector-extract-insert.ll b/test/CodeGen/X86/peep-vector-extract-insert.ll
index d48a3318262c..f958b6b2c069 100644
--- a/test/CodeGen/X86/peep-vector-extract-insert.ll
+++ b/test/CodeGen/X86/peep-vector-extract-insert.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | grep {xorps %xmm0, %xmm0} | count 2
+; RUN: llc < %s -march=x86-64 | grep "xorps %xmm0, %xmm0" | count 2
define float @foo(<4 x float> %a) {
%b = insertelement <4 x float> %a, float 0.0, i32 3
diff --git a/test/CodeGen/X86/phi-immediate-factoring.ll b/test/CodeGen/X86/phi-immediate-factoring.ll
index ef02af2d7851..476bb1099831 100644
--- a/test/CodeGen/X86/phi-immediate-factoring.ll
+++ b/test/CodeGen/X86/phi-immediate-factoring.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -stats |& grep {Number of blocks eliminated} | grep 6
+; RUN: llc < %s -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 6
; PR1296
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/X86/phielim-split.ll b/test/CodeGen/X86/phielim-split.ll
new file mode 100644
index 000000000000..aa477359d60e
--- /dev/null
+++ b/test/CodeGen/X86/phielim-split.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -verify-machineinstrs | FileCheck %s
+target triple = "x86_64-apple-macosx10.8.0"
+
+; The critical edge from for.cond to if.end2 should be split to avoid injecting
+; copies into the loop. The use of %b after the loop causes interference that
+; makes a copy necessary.
+; <rdar://problem/11561842>
+;
+; CHECK: split_loop_exit
+; CHECK: %for.cond
+; CHECK-NOT: mov
+; CHECK: je
+
+define i32 @split_loop_exit(i32 %a, i32 %b, i8* nocapture %p) nounwind uwtable readonly ssp {
+entry:
+ %cmp = icmp sgt i32 %a, 10
+ br i1 %cmp, label %for.cond, label %if.end2
+
+for.cond: ; preds = %entry, %for.cond
+ %p.addr.0 = phi i8* [ %incdec.ptr, %for.cond ], [ %p, %entry ]
+ %incdec.ptr = getelementptr inbounds i8* %p.addr.0, i64 1
+ %0 = load i8* %p.addr.0, align 1
+ %tobool = icmp eq i8 %0, 0
+ br i1 %tobool, label %for.cond, label %if.end2
+
+if.end2: ; preds = %for.cond, %entry
+ %r.0 = phi i32 [ %a, %entry ], [ %b, %for.cond ]
+ %add = add nsw i32 %r.0, %b
+ ret i32 %add
+}
diff --git a/test/CodeGen/X86/phys-reg-local-regalloc.ll b/test/CodeGen/X86/phys-reg-local-regalloc.ll
index 8b9ea17c4e23..37eca1ce0a72 100644
--- a/test/CodeGen/X86/phys-reg-local-regalloc.ll
+++ b/test/CodeGen/X86/phys-reg-local-regalloc.ll
@@ -1,6 +1,7 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin9 -regalloc=fast | FileCheck %s
-; RUN: llc -O0 < %s -march=x86 -mtriple=i386-apple-darwin9 -regalloc=fast | FileCheck %s
-; CHECKed instructions should be the same with or without -O0.
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin9 -mcpu=generic -regalloc=fast -optimize-regalloc=0 | FileCheck %s
+; RUN: llc -O0 < %s -march=x86 -mtriple=i386-apple-darwin9 -mcpu=generic -regalloc=fast | FileCheck %s
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin9 -mcpu=atom -regalloc=fast -optimize-regalloc=0 | FileCheck -check-prefix=ATOM %s
+; CHECKed instructions should be the same with or without -O0 except on Intel Atom due to instruction scheduling.
@.str = private constant [12 x i8] c"x + y = %i\0A\00", align 1 ; <[12 x i8]*> [#uses=1]
@@ -15,6 +16,19 @@ entry:
; CHECK: movl %ebx, 40(%esp)
; CHECK-NOT: movl
; CHECK: addl %ebx, %eax
+
+; On Intel Atom the scheduler moves a movl instruction
+; used for the printf call to follow movl 24(%esp), %eax
+; ATOM: movl 24(%esp), %eax
+; ATOM: movl
+; ATOM: movl %eax, 36(%esp)
+; ATOM-NOT: movl
+; ATOM: movl 28(%esp), %ebx
+; ATOM-NOT: movl
+; ATOM: movl %ebx, 40(%esp)
+; ATOM-NOT: movl
+; ATOM: addl %ebx, %eax
+
%retval = alloca i32 ; <i32*> [#uses=2]
%"%ebx" = alloca i32 ; <i32*> [#uses=1]
%"%eax" = alloca i32 ; <i32*> [#uses=2]
diff --git a/test/CodeGen/X86/phys_subreg_coalesce-3.ll b/test/CodeGen/X86/phys_subreg_coalesce-3.ll
index 4162015ea88e..984d7e57e0c6 100644
--- a/test/CodeGen/X86/phys_subreg_coalesce-3.ll
+++ b/test/CodeGen/X86/phys_subreg_coalesce-3.ll
@@ -1,10 +1,14 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -join-physregs | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+; XFAIL: *
; rdar://5571034
; This requires physreg joining, %vreg13 is live everywhere:
; 304L %CL<def> = COPY %vreg13:sub_8bit; GR32_ABCD:%vreg13
; 320L %vreg15<def> = COPY %vreg19; GR32:%vreg15 GR32_NOSP:%vreg19
; 336L %vreg15<def> = SAR32rCL %vreg15, %EFLAGS<imp-def,dead>, %CL<imp-use,kill>; GR32:%vreg15
+;
+; This test is XFAIL until the register allocator understands trivial physreg
+; interference. <rdar://9802098>
define void @foo(i32* nocapture %quadrant, i32* nocapture %ptr, i32 %bbSize, i32 %bbStart, i32 %shifts) nounwind ssp {
; CHECK: foo:
diff --git a/test/CodeGen/X86/pmul.ll b/test/CodeGen/X86/pmul.ll
index d8ed4c097e00..da4af81959dc 100644
--- a/test/CodeGen/X86/pmul.ll
+++ b/test/CodeGen/X86/pmul.ll
@@ -1,9 +1,7 @@
-; RUN: llc < %s -march=x86 -mattr=sse41 -stack-alignment=16 -join-physregs > %t
+; RUN: llc < %s -march=x86 -mattr=sse41 -mcpu=nehalem -stack-alignment=16 > %t
; RUN: grep pmul %t | count 12
; RUN: grep mov %t | count 11
-; The f() arguments in %xmm0 and %xmm1 cause an extra movdqa without -join-physregs.
-
define <4 x i32> @a(<4 x i32> %i) nounwind {
%A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 >
ret <4 x i32> %A
diff --git a/test/CodeGen/X86/pointer-vector.ll b/test/CodeGen/X86/pointer-vector.ll
index cc1df2fffcc5..800fbedb4f99 100644
--- a/test/CodeGen/X86/pointer-vector.ll
+++ b/test/CodeGen/X86/pointer-vector.ll
@@ -105,8 +105,7 @@ define <2 x i32*> @BITCAST1(<2 x i8*>* %p) nounwind {
entry:
%G = load <2 x i8*>* %p
;CHECK: movl
-;CHECK: movd
-;CHECK: pinsrd
+;CHECK: movsd
%T = bitcast <2 x i8*> %G to <2 x i32*>
;CHECK: ret
ret <2 x i32*> %T
diff --git a/test/CodeGen/X86/pr11415.ll b/test/CodeGen/X86/pr11415.ll
index e1fa0326b762..6c32a2206a7e 100644
--- a/test/CodeGen/X86/pr11415.ll
+++ b/test/CodeGen/X86/pr11415.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-pc-linux %s -o - -regalloc=fast | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux %s -o - -regalloc=fast -optimize-regalloc=0 | FileCheck %s
; We used to consider the early clobber in the second asm statement as
; defining %0 before it was read. This caused us to omit the
diff --git a/test/CodeGen/X86/pr11468.ll b/test/CodeGen/X86/pr11468.ll
new file mode 100644
index 000000000000..f7e9adb4a211
--- /dev/null
+++ b/test/CodeGen/X86/pr11468.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -force-align-stack -stack-alignment=32 -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s
+; PR11468
+
+define void @f(i64 %sz) uwtable {
+entry:
+ %a = alloca i32, align 32
+ store volatile i32 0, i32* %a, align 32
+ ; force to push r14 on stack
+ call void asm sideeffect "nop", "~{r14},~{dirflag},~{fpsr},~{flags}"() nounwind, !srcloc !0
+ ret void
+
+; CHECK: _f
+; CHECK: pushq %rbp
+; CHECK: .cfi_offset %rbp, -16
+; CHECK: movq %rsp, %rbp
+; CHECK: .cfi_def_cfa_register %rbp
+
+; We first push register on stack, and then realign it, so that
+; .cfi_offset value is correct
+; CHECK: pushq %r14
+; CHECK: andq $-32, %rsp
+; CHECK: .cfi_offset %r14, -24
+
+; Restore %rsp from %rbp and subtract the total size of saved regsiters.
+; CHECK: leaq -8(%rbp), %rsp
+
+; Pop saved registers.
+; CHECK: popq %r14
+; CHECK: popq %rbp
+}
+
+!0 = metadata !{i32 125}
+
diff --git a/test/CodeGen/X86/pr12889.ll b/test/CodeGen/X86/pr12889.ll
new file mode 100644
index 000000000000..331d8f907d58
--- /dev/null
+++ b/test/CodeGen/X86/pr12889.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@c0 = common global i8 0, align 1
+
+define void @func() nounwind uwtable {
+entry:
+ %0 = load i8* @c0, align 1, !tbaa !0
+ %tobool = icmp ne i8 %0, 0
+ %conv = zext i1 %tobool to i8
+ %storemerge = shl nuw nsw i8 %conv, %conv
+ store i8 %storemerge, i8* @c0, align 1
+ ret void
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/pr13209.ll b/test/CodeGen/X86/pr13209.ll
new file mode 100644
index 000000000000..1c9316365959
--- /dev/null
+++ b/test/CodeGen/X86/pr13209.ll
@@ -0,0 +1,74 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s
+
+; CHECK: pr13209:
+; CHECK-NOT: mov
+; CHECK: .size pr13209
+
+define zeroext i1 @pr13209(i8** %x, i8*** %jumpTable) nounwind {
+if.end51:
+ br label %indirectgoto.preheader
+indirectgoto.preheader:
+ %frombool.i5915.ph = phi i8 [ undef, %if.end51 ], [ %frombool.i5917, %jit_return ]
+ br label %indirectgoto
+do.end165:
+ %tmp92 = load i8** %x, align 8
+ br label %indirectgoto
+do.end209:
+ %tmp104 = load i8** %x, align 8
+ br label %indirectgoto
+do.end220:
+ %tmp107 = load i8** %x, align 8
+ br label %indirectgoto
+do.end231:
+ %tmp110 = load i8** %x, align 8
+ br label %indirectgoto
+do.end242:
+ %tmp113 = load i8** %x, align 8
+ br label %indirectgoto
+do.end253:
+ %tmp116 = load i8** %x, align 8
+ br label %indirectgoto
+do.end286:
+ %tmp125 = load i8** %x, align 8
+ br label %indirectgoto
+do.end297:
+ %tmp128 = load i8** %x, align 8
+ br label %indirectgoto
+do.end308:
+ %tmp131 = load i8** %x, align 8
+ br label %indirectgoto
+do.end429:
+ %tmp164 = load i8** %x, align 8
+ br label %indirectgoto
+do.end440:
+ %tmp167 = load i8** %x, align 8
+ br label %indirectgoto
+do.body482:
+ br i1 false, label %indirectgoto, label %do.body495
+do.body495:
+ br label %indirectgoto
+do.end723:
+ br label %inline_return
+inline_return:
+ %frombool.i5917 = phi i8 [ 0, %if.end5571 ], [ %frombool.i5915, %do.end723 ]
+ br label %jit_return
+jit_return:
+ br label %indirectgoto.preheader
+L_JSOP_UINT24:
+ %tmp864 = load i8** %x, align 8
+ br label %indirectgoto
+L_JSOP_THROWING:
+ %tmp1201 = load i8** %x, align 8
+ br label %indirectgoto
+do.body4936:
+ %tmp1240 = load i8** %x, align 8
+ br label %indirectgoto
+do.body5184:
+ %tmp1340 = load i8** %x, align 8
+ br label %indirectgoto
+if.end5571:
+ br label %inline_return
+indirectgoto:
+ %frombool.i5915 = phi i8 [ 0, %do.body495 ],[ 0, %do.body482 ] , [ %frombool.i5915, %do.body4936 ],[ %frombool.i5915, %do.body5184 ], [ %frombool.i5915, %L_JSOP_UINT24 ], [ %frombool.i5915, %do.end286 ], [ %frombool.i5915, %do.end297 ], [ %frombool.i5915, %do.end308 ], [ %frombool.i5915, %do.end429 ], [ %frombool.i5915, %do.end440 ], [ %frombool.i5915, %L_JSOP_THROWING ], [ %frombool.i5915, %do.end253 ], [ %frombool.i5915, %do.end242 ], [ %frombool.i5915, %do.end231 ], [ %frombool.i5915, %do.end220 ], [ %frombool.i5915, %do.end209 ],[ %frombool.i5915, %do.end165 ], [ %frombool.i5915.ph, %indirectgoto.preheader ]
+ indirectbr i8* null, [ label %if.end5571, label %do.end165, label %do.end209, label %do.end220, label %do.end231, label %do.end242, label %do.end253, label %do.end723, label %L_JSOP_THROWING, label %do.end440, label %do.end429, label %do.end308, label %do.end297, label %do.end286, label %L_JSOP_UINT24, label %do.body5184, label %do.body4936, label %do.body482]
+}
diff --git a/test/CodeGen/X86/pr13220.ll b/test/CodeGen/X86/pr13220.ll
new file mode 100644
index 000000000000..b9ac4b63ecf0
--- /dev/null
+++ b/test/CodeGen/X86/pr13220.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=x86 < %s
+; PR13220
+
+define <8 x i32> @foo(<8 x i96> %x) {
+ %a = lshr <8 x i96> %x, <i96 1, i96 1, i96 1, i96 1, i96 1, i96 1, i96 1, i96 1>
+ %b = trunc <8 x i96> %a to <8 x i32>
+ ret <8 x i32> %b
+}
+
+define <8 x i32> @bar(<8 x i97> %x) {
+ %a = lshr <8 x i97> %x, <i97 1, i97 1, i97 1, i97 1, i97 1, i97 1, i97 1, i97 1>
+ %b = trunc <8 x i97> %a to <8 x i32>
+ ret <8 x i32> %b
+}
+
+define <8 x i32> @bax() {
+ %a = lshr <8 x i96> <i96 4, i96 4, i96 4, i96 4, i96 4, i96 4, i96 4, i96 4>, <i96 1, i96 1, i96 1, i96 1, i96 1, i96 1, i96 1, i96 1>
+ %b = trunc <8 x i96> %a to <8 x i32>
+ ret <8 x i32> %b
+}
diff --git a/test/CodeGen/X86/pr13577.ll b/test/CodeGen/X86/pr13577.ll
new file mode 100644
index 000000000000..faaec262cb91
--- /dev/null
+++ b/test/CodeGen/X86/pr13577.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86-64
+
+define x86_fp80 @foo(x86_fp80 %a) {
+ %1 = tail call x86_fp80 @copysignl(x86_fp80 0xK7FFF8000000000000000, x86_fp80 %a) nounwind readnone
+ ret x86_fp80 %1
+}
+
+declare x86_fp80 @copysignl(x86_fp80, x86_fp80) nounwind readnone
diff --git a/test/CodeGen/X86/pr2656.ll b/test/CodeGen/X86/pr2656.ll
index afd71143c458..f0e31f7f5fdc 100644
--- a/test/CodeGen/X86/pr2656.ll
+++ b/test/CodeGen/X86/pr2656.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {xorps.\*sp} | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep "xorps.*sp" | count 1
; PR2656
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/pr3522.ll b/test/CodeGen/X86/pr3522.ll
index 112253038b79..d8f37781fc6e 100644
--- a/test/CodeGen/X86/pr3522.ll
+++ b/test/CodeGen/X86/pr3522.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -stats |& not grep {instructions sunk}
+; RUN: llc < %s -march=x86 -stats 2>&1 | not grep "instructions sunk"
; PR3522
target triple = "i386-pc-linux-gnu"
diff --git a/test/CodeGen/X86/promote-trunc.ll b/test/CodeGen/X86/promote-trunc.ll
index 4211d82268d2..40a58b073924 100644
--- a/test/CodeGen/X86/promote-trunc.ll
+++ b/test/CodeGen/X86/promote-trunc.ll
@@ -1,4 +1,4 @@
-; RUN: llc -promote-elements < %s -march=x86-64
+; RUN: llc < %s -march=x86-64
define<4 x i8> @func_8_64() {
%F = load <4 x i64>* undef
diff --git a/test/CodeGen/X86/rd-mod-wr-eflags.ll b/test/CodeGen/X86/rd-mod-wr-eflags.ll
index faca3d7bacdb..8ef9b5dec0d5 100644
--- a/test/CodeGen/X86/rd-mod-wr-eflags.ll
+++ b/test/CodeGen/X86/rd-mod-wr-eflags.ll
@@ -177,3 +177,49 @@ if.end4:
return:
ret void
}
+
+; Deal with TokenFactor chain
+; rdar://11236106
+@foo = external global i64*, align 8
+
+define void @test3() nounwind ssp {
+entry:
+; CHECK: test3:
+; CHECK: decq 16(%rax)
+ %0 = load i64** @foo, align 8
+ %arrayidx = getelementptr inbounds i64* %0, i64 2
+ %1 = load i64* %arrayidx, align 8
+ %dec = add i64 %1, -1
+ store i64 %dec, i64* %arrayidx, align 8
+ %cmp = icmp eq i64 %dec, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ tail call void @baz() nounwind
+ br label %if.end
+
+if.end:
+ ret void
+}
+
+declare void @baz()
+
+; Avoid creating a cycle in the DAG which would trigger an assert in the
+; scheduler.
+; PR12565
+; rdar://11451474
+@x = external global i32, align 4
+@y = external global i32, align 4
+@z = external global i32, align 4
+
+define void @test4() nounwind uwtable ssp {
+entry:
+ %0 = load i32* @x, align 4
+ %1 = load i32* @y, align 4
+ %dec = add nsw i32 %1, -1
+ store i32 %dec, i32* @y, align 4
+ %tobool.i = icmp ne i32 %dec, 0
+ %cond.i = select i1 %tobool.i, i32 %0, i32 0
+ store i32 %cond.i, i32* @z, align 4
+ ret void
+}
diff --git a/test/CodeGen/X86/rdrand.ll b/test/CodeGen/X86/rdrand.ll
new file mode 100644
index 000000000000..e2224a619676
--- /dev/null
+++ b/test/CodeGen/X86/rdrand.ll
@@ -0,0 +1,85 @@
+; RUN: llc < %s -march=x86-64 -mcpu=core-avx-i -mattr=+rdrand | FileCheck %s
+declare {i16, i32} @llvm.x86.rdrand.16()
+declare {i32, i32} @llvm.x86.rdrand.32()
+declare {i64, i32} @llvm.x86.rdrand.64()
+
+define i32 @_rdrand16_step(i16* %random_val) {
+ %call = call {i16, i32} @llvm.x86.rdrand.16()
+ %randval = extractvalue {i16, i32} %call, 0
+ store i16 %randval, i16* %random_val
+ %isvalid = extractvalue {i16, i32} %call, 1
+ ret i32 %isvalid
+; CHECK: _rdrand16_step:
+; CHECK: rdrandw %ax
+; CHECK: movw %ax, (%r[[A0:di|cx]])
+; CHECK: movzwl %ax, %ecx
+; CHECK: movl $1, %eax
+; CHECK: cmovael %ecx, %eax
+; CHECK: ret
+}
+
+define i32 @_rdrand32_step(i32* %random_val) {
+ %call = call {i32, i32} @llvm.x86.rdrand.32()
+ %randval = extractvalue {i32, i32} %call, 0
+ store i32 %randval, i32* %random_val
+ %isvalid = extractvalue {i32, i32} %call, 1
+ ret i32 %isvalid
+; CHECK: _rdrand32_step:
+; CHECK: rdrandl %e[[T0:[a-z]+]]
+; CHECK: movl %e[[T0]], (%r[[A0]])
+; CHECK: movl $1, %eax
+; CHECK: cmovael %e[[T0]], %eax
+; CHECK: ret
+}
+
+define i32 @_rdrand64_step(i64* %random_val) {
+ %call = call {i64, i32} @llvm.x86.rdrand.64()
+ %randval = extractvalue {i64, i32} %call, 0
+ store i64 %randval, i64* %random_val
+ %isvalid = extractvalue {i64, i32} %call, 1
+ ret i32 %isvalid
+; CHECK: _rdrand64_step:
+; CHECK: rdrandq %r[[T1:[[a-z]+]]
+; CHECK: movq %r[[T1]], (%r[[A0]])
+; CHECK: movl $1, %eax
+; CHECK: cmovael %e[[T1]], %eax
+; CHECK: ret
+}
+
+; Check that MachineCSE doesn't eliminate duplicate rdrand instructions.
+define i32 @CSE() nounwind {
+ %rand1 = tail call { i32, i32 } @llvm.x86.rdrand.32() nounwind
+ %v1 = extractvalue { i32, i32 } %rand1, 0
+ %rand2 = tail call { i32, i32 } @llvm.x86.rdrand.32() nounwind
+ %v2 = extractvalue { i32, i32 } %rand2, 0
+ %add = add i32 %v2, %v1
+ ret i32 %add
+; CHECK: CSE:
+; CHECK: rdrandl
+; CHECK: rdrandl
+}
+
+; Check that MachineLICM doesn't hoist rdrand instructions.
+define void @loop(i32* %p, i32 %n) nounwind {
+entry:
+ %tobool1 = icmp eq i32 %n, 0
+ br i1 %tobool1, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %p.addr.03 = phi i32* [ %incdec.ptr, %while.body ], [ %p, %entry ]
+ %n.addr.02 = phi i32 [ %dec, %while.body ], [ %n, %entry ]
+ %dec = add nsw i32 %n.addr.02, -1
+ %incdec.ptr = getelementptr inbounds i32* %p.addr.03, i64 1
+ %rand = tail call { i32, i32 } @llvm.x86.rdrand.32() nounwind
+ %v1 = extractvalue { i32, i32 } %rand, 0
+ store i32 %v1, i32* %p.addr.03, align 4
+ %tobool = icmp eq i32 %dec, 0
+ br i1 %tobool, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ ret void
+; CHECK: loop:
+; CHECK-NOT: rdrandl
+; CHECK: This Inner Loop Header: Depth=1
+; CHECK: rdrandl
+}
diff --git a/test/CodeGen/X86/regpressure.ll b/test/CodeGen/X86/regpressure.ll
index e0b5f7a870bb..52d7b56f182e 100644
--- a/test/CodeGen/X86/regpressure.ll
+++ b/test/CodeGen/X86/regpressure.ll
@@ -1,8 +1,8 @@
;; Both functions in this testcase should codegen to the same function, and
;; neither of them should require spilling anything to the stack.
-; RUN: llc < %s -march=x86 -stats |& \
-; RUN: not grep {Number of register spills}
+; RUN: llc < %s -march=x86 -stats 2>&1 | \
+; RUN: not grep "Number of register spills"
;; This can be compiled to use three registers if the loads are not
;; folded into the multiplies, 2 registers otherwise.
diff --git a/test/CodeGen/X86/remat-fold-load.ll b/test/CodeGen/X86/remat-fold-load.ll
new file mode 100644
index 000000000000..de77ad375672
--- /dev/null
+++ b/test/CodeGen/X86/remat-fold-load.ll
@@ -0,0 +1,143 @@
+; RUN: llc < %s -disable-fp-elim -verify-coalescing
+; PR13414
+;
+; During coalescing, remat triggers DCE which deletes the penultimate use of a
+; load. This load should not be folded into the remaining use because it is not
+; safe to move, and it would extend the live range of the address.
+;
+; LiveRangeEdit::foldAsLoad() doesn't extend live ranges, so -verify-coalescing
+; catches the problem.
+
+target triple = "i386-unknown-linux-gnu"
+
+%type_a = type { %type_a*, %type_b }
+%type_b = type { %type_c, i32 }
+%type_c = type { i32, %type_d }
+%type_d = type { i64 }
+%type_e = type { %type_c, i64 }
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+define linkonce_odr void @test() nounwind {
+entry:
+ br i1 undef, label %while.end.while.end26_crit_edge, label %while.body12.lr.ph
+
+while.end.while.end26_crit_edge: ; preds = %entry
+ br label %while.end26
+
+while.body12.lr.ph: ; preds = %entry
+ br label %while.body12
+
+while.body12: ; preds = %if.end24, %while.body12.lr.ph
+ %tmp = phi %type_a* [ undef, %while.body12.lr.ph ], [ %tmp18, %if.end24 ]
+ %ins151154161 = phi i128 [ 0, %while.body12.lr.ph ], [ %phitmp, %if.end24 ]
+ %ins135156160 = phi i128 [ 0, %while.body12.lr.ph ], [ %phitmp158, %if.end24 ]
+ %ins151 = or i128 0, %ins151154161
+ %cmp.i.i.i.i.i67 = icmp sgt i32 undef, 8
+ br i1 %cmp.i.i.i.i.i67, label %if.then.i.i.i.i71, label %if.else.i.i.i.i74
+
+if.then.i.i.i.i71: ; preds = %while.body12
+ %call4.i.i.i.i68 = call noalias i8* @malloc(i32 undef) nounwind
+ %tmp1 = getelementptr inbounds %type_a* %tmp, i32 0, i32 1, i32 0, i32 1
+ %buf_6.i.i.i.i70 = bitcast %type_d* %tmp1 to i8**
+ %tmp2 = load i8** %buf_6.i.i.i.i70, align 4
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* %tmp2, i32 undef, i32 1, i1 false) nounwind
+ unreachable
+
+if.else.i.i.i.i74: ; preds = %while.body12
+ %i_.i.i.i.i72 = getelementptr inbounds %type_a* %tmp, i32 0, i32 1, i32 0, i32 1, i32 0
+ %tmp3 = load i64* %i_.i.i.i.i72, align 4
+ %tmp4 = zext i64 %tmp3 to i128
+ %tmp5 = shl nuw nsw i128 %tmp4, 32
+ %ins148 = or i128 %tmp5, %ins151
+ %second3.i.i76 = getelementptr inbounds %type_a* %tmp, i32 0, i32 1, i32 1
+ %tmp6 = load i32* %second3.i.i76, align 4
+ %tmp7 = zext i32 %tmp6 to i128
+ %tmp8 = shl nuw i128 %tmp7, 96
+ %mask144 = and i128 %ins148, 79228162495817593519834398720
+ %tmp9 = load %type_e** undef, align 4
+ %len_.i.i.i.i86 = getelementptr inbounds %type_e* %tmp9, i32 0, i32 0, i32 0
+ %tmp10 = load i32* %len_.i.i.i.i86, align 4
+ %tmp11 = zext i32 %tmp10 to i128
+ %ins135 = or i128 %tmp11, %ins135156160
+ %cmp.i.i.i.i.i88 = icmp sgt i32 %tmp10, 8
+ br i1 %cmp.i.i.i.i.i88, label %if.then.i.i.i.i92, label %if.else.i.i.i.i95
+
+if.then.i.i.i.i92: ; preds = %if.else.i.i.i.i74
+ %call4.i.i.i.i89 = call noalias i8* @malloc(i32 %tmp10) nounwind
+ %ins126 = or i128 0, %ins135
+ %tmp12 = getelementptr inbounds %type_e* %tmp9, i32 0, i32 0, i32 1
+ %buf_6.i.i.i.i91 = bitcast %type_d* %tmp12 to i8**
+ %tmp13 = load i8** %buf_6.i.i.i.i91, align 4
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %call4.i.i.i.i89, i8* %tmp13, i32 %tmp10, i32 1, i1 false) nounwind
+ br label %A
+
+if.else.i.i.i.i95: ; preds = %if.else.i.i.i.i74
+ %i_.i.i.i.i93 = getelementptr inbounds %type_e* %tmp9, i32 0, i32 0, i32 1, i32 0
+ br label %A
+
+A: ; preds = %if.else.i.i.i.i95, %if.then.i.i.i.i92
+ %ins135157 = phi i128 [ %ins126, %if.then.i.i.i.i92 ], [ undef, %if.else.i.i.i.i95 ]
+ %second3.i.i97 = getelementptr inbounds %type_e* %tmp9, i32 0, i32 1
+ %tmp14 = load i64* %second3.i.i97, align 4
+ %tmp15 = trunc i64 %tmp14 to i32
+ %cmp.i99 = icmp sgt i32 %tmp6, %tmp15
+ %tmp16 = trunc i128 %ins135157 to i32
+ %cmp.i.i.i.i.i.i101 = icmp sgt i32 %tmp16, 8
+ br i1 %cmp.i.i.i.i.i.i101, label %if.then.i.i.i.i.i103, label %B
+
+if.then.i.i.i.i.i103: ; preds = %A
+ unreachable
+
+B: ; preds = %A
+ %tmp17 = trunc i128 %ins148 to i32
+ %cmp.i.i.i.i.i.i83 = icmp sgt i32 %tmp17, 8
+ br i1 %cmp.i.i.i.i.i.i83, label %if.then.i.i.i.i.i85, label %C
+
+if.then.i.i.i.i.i85: ; preds = %B
+ unreachable
+
+C: ; preds = %B
+ br i1 %cmp.i99, label %if.then17, label %if.end24
+
+if.then17: ; preds = %C
+ br i1 false, label %if.then.i.i.i.i.i43, label %D
+
+if.then.i.i.i.i.i43: ; preds = %if.then17
+ unreachable
+
+D: ; preds = %if.then17
+ br i1 undef, label %if.then.i.i.i.i.i, label %E
+
+if.then.i.i.i.i.i: ; preds = %D
+ unreachable
+
+E: ; preds = %D
+ br label %if.end24
+
+if.end24: ; preds = %E, %C
+ %phitmp = or i128 %tmp8, %mask144
+ %phitmp158 = or i128 undef, undef
+ %tmp18 = load %type_a** undef, align 4
+ %tmp19 = load %type_a** undef, align 4
+ %cmp.i49 = icmp eq %type_a* %tmp18, %tmp19
+ br i1 %cmp.i49, label %while.cond10.while.end26_crit_edge, label %while.body12
+
+while.cond10.while.end26_crit_edge: ; preds = %if.end24
+ %.pre = load %type_e** undef, align 4
+ br label %while.end26
+
+while.end26: ; preds = %while.cond10.while.end26_crit_edge, %while.end.while.end26_crit_edge
+ br i1 undef, label %while.body.lr.ph.i, label %F
+
+while.body.lr.ph.i: ; preds = %while.end26
+ br label %while.body.i
+
+while.body.i: ; preds = %while.body.i, %while.body.lr.ph.i
+ br i1 false, label %while.body.i, label %F
+
+F: ; preds = %while.body.i, %while.end26
+ ret void
+}
+
+declare noalias i8* @malloc(i32) nounwind
diff --git a/test/CodeGen/X86/remat-scalar-zero.ll b/test/CodeGen/X86/remat-scalar-zero.ll
index 75f438d26cd0..f6095a75561c 100644
--- a/test/CodeGen/X86/remat-scalar-zero.ll
+++ b/test/CodeGen/X86/remat-scalar-zero.ll
@@ -3,7 +3,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu > %t
; RUN: not grep xor %t
; RUN: not grep movap %t
-; RUN: grep {\\.quad.*0} %t
+; RUN: grep "\.quad.*0" %t
; Remat should be able to fold the zero constant into the div instructions
; as a constant-pool load.
diff --git a/test/CodeGen/X86/reverse_branches.ll b/test/CodeGen/X86/reverse_branches.ll
new file mode 100644
index 000000000000..97721250377e
--- /dev/null
+++ b/test/CodeGen/X86/reverse_branches.ll
@@ -0,0 +1,104 @@
+; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+
+@.str2 = private unnamed_addr constant [7 x i8] c"memchr\00", align 1
+@.str3 = private unnamed_addr constant [11 x i8] c"bsd_memchr\00", align 1
+@str4 = private unnamed_addr constant [5 x i8] c"Bug!\00"
+
+; Make sure at end of do.cond.i, we jump to do.body.i first to have a tighter
+; inner loop.
+define i32 @test_branches_order() uwtable ssp {
+; CHECK: test_branches_order:
+; CHECK: [[L0:LBB0_[0-9]+]]: ## %do.body.i
+; CHECK: je
+; CHECK: %do.cond.i
+; CHECK: jne [[L0]]
+; CHECK: jmp
+; CHECK: %exit
+entry:
+ %strs = alloca [1000 x [1001 x i8]], align 16
+ br label %for.cond
+
+for.cond:
+ %j.0 = phi i32 [ 0, %entry ], [ %inc10, %for.inc9 ]
+ %cmp = icmp slt i32 %j.0, 1000
+ br i1 %cmp, label %for.cond1, label %for.end11
+
+for.cond1:
+ %indvars.iv50 = phi i64 [ %indvars.iv.next51, %for.body3 ], [ 0, %for.cond ]
+ %0 = trunc i64 %indvars.iv50 to i32
+ %cmp2 = icmp slt i32 %0, 1000
+ br i1 %cmp2, label %for.body3, label %for.inc9
+
+for.body3:
+ %arraydecay = getelementptr inbounds [1000 x [1001 x i8]]* %strs, i64 0, i64 %indvars.iv50, i64 0
+ %call = call i8* @memchr(i8* %arraydecay, i32 120, i64 1000)
+ %add.ptr = getelementptr inbounds [1000 x [1001 x i8]]* %strs, i64 0, i64 %indvars.iv50, i64 %indvars.iv50
+ %cmp7 = icmp eq i8* %call, %add.ptr
+ %indvars.iv.next51 = add i64 %indvars.iv50, 1
+ br i1 %cmp7, label %for.cond1, label %if.then
+
+if.then:
+ %puts = call i32 @puts(i8* getelementptr inbounds ([5 x i8]* @str4, i64 0, i64 0))
+ call void @exit(i32 1) noreturn
+ unreachable
+
+for.inc9:
+ %inc10 = add nsw i32 %j.0, 1
+ br label %for.cond
+
+for.end11:
+ %puts42 = call i32 @puts(i8* getelementptr inbounds ([7 x i8]* @.str2, i64 0, i64 0))
+ br label %for.cond14
+
+for.cond14:
+ %j13.0 = phi i32 [ 0, %for.end11 ], [ %inc39, %for.inc38 ]
+ %cmp15 = icmp slt i32 %j13.0, 1000
+ br i1 %cmp15, label %for.cond18, label %for.end40
+
+for.cond18:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %exit ], [ 0, %for.cond14 ]
+ %1 = trunc i64 %indvars.iv to i32
+ %cmp19 = icmp slt i32 %1, 1000
+ br i1 %cmp19, label %for.body20, label %for.inc38
+
+for.body20:
+ %arraydecay24 = getelementptr inbounds [1000 x [1001 x i8]]* %strs, i64 0, i64 %indvars.iv, i64 0
+ br label %do.body.i
+
+do.body.i:
+ %n.addr.0.i = phi i64 [ %dec.i, %do.cond.i ], [ 1000, %for.body20 ]
+ %p.0.i = phi i8* [ %incdec.ptr.i, %do.cond.i ], [ %arraydecay24, %for.body20 ]
+ %2 = load i8* %p.0.i, align 1
+ %cmp3.i = icmp eq i8 %2, 120
+ br i1 %cmp3.i, label %exit, label %do.cond.i
+
+do.cond.i:
+ %incdec.ptr.i = getelementptr inbounds i8* %p.0.i, i64 1
+ %dec.i = add i64 %n.addr.0.i, -1
+ %cmp5.i = icmp eq i64 %dec.i, 0
+ br i1 %cmp5.i, label %if.then32, label %do.body.i
+
+exit:
+ %add.ptr30 = getelementptr inbounds [1000 x [1001 x i8]]* %strs, i64 0, i64 %indvars.iv, i64 %indvars.iv
+ %cmp31 = icmp eq i8* %p.0.i, %add.ptr30
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ br i1 %cmp31, label %for.cond18, label %if.then32
+
+if.then32:
+ %puts43 = call i32 @puts(i8* getelementptr inbounds ([5 x i8]* @str4, i64 0, i64 0))
+ call void @exit(i32 1) noreturn
+ unreachable
+
+for.inc38:
+ %inc39 = add nsw i32 %j13.0, 1
+ br label %for.cond14
+
+for.end40:
+ %puts44 = call i32 @puts(i8* getelementptr inbounds ([11 x i8]* @.str3, i64 0, i64 0))
+ ret i32 0
+}
+
+declare i8* @memchr(i8*, i32, i64) nounwind readonly
+declare void @exit(i32) noreturn
+declare i32 @puts(i8* nocapture) nounwind
+
diff --git a/test/CodeGen/X86/rotate.ll b/test/CodeGen/X86/rotate.ll
index 1e20273194d5..117300110b41 100644
--- a/test/CodeGen/X86/rotate.ll
+++ b/test/CodeGen/X86/rotate.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
-; RUN: grep {ro\[rl\]} | count 12
+; RUN: grep "ro[rl]" | count 12
define i32 @rotl32(i32 %A, i8 %Amt) {
%shift.upgrd.1 = zext i8 %Amt to i32 ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/rounding-ops.ll b/test/CodeGen/X86/rounding-ops.ll
index 0dd74ea0791e..51fcf6418429 100644
--- a/test/CodeGen/X86/rounding-ops.ll
+++ b/test/CodeGen/X86/rounding-ops.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse41 | FileCheck -check-prefix=CHECK-SSE %s
-; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck -check-prefix=CHECK-AVX %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse41 | FileCheck -check-prefix=CHECK-SSE %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+avx | FileCheck -check-prefix=CHECK-AVX %s
define float @test1(float %x) nounwind {
%call = tail call float @floorf(float %x) nounwind readnone
diff --git a/test/CodeGen/X86/segmented-stacks-dynamic.ll b/test/CodeGen/X86/segmented-stacks-dynamic.ll
index 5ce08aa51c76..d68b00b69a2b 100644
--- a/test/CodeGen/X86/segmented-stacks-dynamic.ll
+++ b/test/CodeGen/X86/segmented-stacks-dynamic.ll
@@ -51,14 +51,14 @@ false:
; X64-NEXT: callq __morestack
; X64-NEXT: ret
-; X64: movq %rsp, %rdi
-; X64-NEXT: subq %rax, %rdi
-; X64-NEXT: cmpq %rdi, %fs:112
+; X64: movq %rsp, %[[RDI:rdi|rax]]
+; X64-NEXT: subq %{{.*}}, %[[RDI]]
+; X64-NEXT: cmpq %[[RDI]], %fs:112
-; X64: movq %rdi, %rsp
+; X64: movq %[[RDI]], %rsp
-; X64: movq %rax, %rdi
+; X64: movq %{{.*}}, %rdi
; X64-NEXT: callq __morestack_allocate_stack_space
-; X64-NEXT: movq %rax, %rdi
+; X64: movq %rax, %rdi
}
diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll
index f465a4ffc584..2e39473057b1 100644
--- a/test/CodeGen/X86/select.ll
+++ b/test/CodeGen/X86/select.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mcpu=generic | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mcpu=atom | FileCheck -check-prefix=ATOM %s
; PR5757
%0 = type { i64, i32 }
@@ -12,6 +13,10 @@ define i32 @test1(%0* %p, %0* %q, i1 %r) nounwind {
; CHECK: test1:
; CHECK: cmovneq %rdi, %rsi
; CHECK: movl (%rsi), %eax
+
+; ATOM: test1:
+; ATOM: cmovneq %rdi, %rsi
+; ATOM: movl (%rsi), %eax
}
@@ -31,6 +36,10 @@ bb91: ; preds = %bb84
; CHECK: test2:
; CHECK: movnew
; CHECK: movswl
+
+; ATOM: test2:
+; ATOM: movnew
+; ATOM: movswl
}
declare i1 @return_false()
@@ -44,6 +53,9 @@ entry:
ret float %iftmp.0.0
; CHECK: test3:
; CHECK: movss {{.*}},4), %xmm0
+
+; ATOM: test3:
+; ATOM: movss {{.*}},4), %xmm0
}
define signext i8 @test4(i8* nocapture %P, double %F) nounwind readonly {
@@ -55,6 +67,9 @@ entry:
ret i8 %2
; CHECK: test4:
; CHECK: movsbl ({{.*}},4), %eax
+
+; ATOM: test4:
+; ATOM: movsbl ({{.*}},4), %eax
}
define void @test5(i1 %c, <2 x i16> %a, <2 x i16> %b, <2 x i16>* %p) nounwind {
@@ -62,6 +77,8 @@ define void @test5(i1 %c, <2 x i16> %a, <2 x i16> %b, <2 x i16>* %p) nounwind {
store <2 x i16> %x, <2 x i16>* %p
ret void
; CHECK: test5:
+
+; ATOM: test5:
}
define void @test6(i32 %C, <4 x float>* %A, <4 x float>* %B) nounwind {
@@ -79,6 +96,12 @@ define void @test6(i32 %C, <4 x float>* %A, <4 x float>* %B) nounwind {
; CHECK: ret
; CHECK: mulps
; CHECK: ret
+
+; ATOM: test6:
+; ATOM: je
+; ATOM: ret
+; ATOM: mulps
+; ATOM: ret
}
; Select with fp80's
@@ -89,6 +112,10 @@ define x86_fp80 @test7(i32 %tmp8) nounwind {
; CHECK: test7:
; CHECK: leaq
; CHECK: fldt (%r{{.}}x,%r{{.}}x)
+
+; ATOM: test7:
+; ATOM: leaq
+; ATOM: fldt (%r{{.}}x,%r{{.}}x)
}
; widening select v6i32 and then a sub
@@ -97,8 +124,10 @@ define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2)
%val = sub <6 x i32> %x, < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
store <6 x i32> %val, <6 x i32>* %dst.addr
ret void
-
+
; CHECK: test8:
+
+; ATOM: test8:
}
@@ -113,6 +142,12 @@ define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
; CHECK: sbbq %rax, %rax
; CHECK: orq %rsi, %rax
; CHECK: ret
+
+; ATOM: test9:
+; ATOM: cmpq $1, %rdi
+; ATOM: sbbq %rax, %rax
+; ATOM: orq %rsi, %rax
+; ATOM: ret
}
;; Same as test9
@@ -125,6 +160,12 @@ define i64 @test9a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
; CHECK: sbbq %rax, %rax
; CHECK: orq %rsi, %rax
; CHECK: ret
+
+; ATOM: test9a:
+; ATOM: cmpq $1, %rdi
+; ATOM: sbbq %rax, %rax
+; ATOM: orq %rsi, %rax
+; ATOM: ret
}
define i64 @test9b(i64 %x, i64 %y) nounwind readnone ssp noredzone {
@@ -137,6 +178,12 @@ define i64 @test9b(i64 %x, i64 %y) nounwind readnone ssp noredzone {
; CHECK: sbbq %rax, %rax
; CHECK: orq %rsi, %rax
; CHECK: ret
+
+; ATOM: test9b:
+; ATOM: cmpq $1, %rdi
+; ATOM: sbbq %rax, %rax
+; ATOM: orq %rsi, %rax
+; ATOM: ret
}
;; Select between -1 and 1.
@@ -149,6 +196,12 @@ define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone {
; CHECK: sbbq %rax, %rax
; CHECK: orq $1, %rax
; CHECK: ret
+
+; ATOM: test10:
+; ATOM: cmpq $1, %rdi
+; ATOM: sbbq %rax, %rax
+; ATOM: orq $1, %rax
+; ATOM: ret
}
@@ -163,6 +216,13 @@ define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
; CHECK: notq %rax
; CHECK: orq %rsi, %rax
; CHECK: ret
+
+; ATOM: test11:
+; ATOM: cmpq $1, %rdi
+; ATOM: sbbq %rax, %rax
+; ATOM: notq %rax
+; ATOM: orq %rsi, %rax
+; ATOM: ret
}
define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
@@ -175,6 +235,13 @@ define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
; CHECK: notq %rax
; CHECK: orq %rsi, %rax
; CHECK: ret
+
+; ATOM: test11a:
+; ATOM: cmpq $1, %rdi
+; ATOM: sbbq %rax, %rax
+; ATOM: notq %rax
+; ATOM: orq %rsi, %rax
+; ATOM: ret
}
@@ -189,10 +256,16 @@ entry:
%call = tail call noalias i8* @_Znam(i64 %D) nounwind noredzone
ret i8* %call
; CHECK: test12:
-; CHECK: mulq
; CHECK: movq $-1, %rdi
+; CHECK: mulq
; CHECK: cmovnoq %rax, %rdi
; CHECK: jmp __Znam
+
+; ATOM: test12:
+; ATOM: mulq
+; ATOM: movq $-1, %rdi
+; ATOM: cmovnoq %rax, %rdi
+; ATOM: jmp __Znam
}
declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone
@@ -205,6 +278,11 @@ define i32 @test13(i32 %a, i32 %b) nounwind {
; CHECK: cmpl
; CHECK-NEXT: sbbl
; CHECK-NEXT: ret
+
+; ATOM: test13:
+; ATOM: cmpl
+; ATOM-NEXT: sbbl
+; ATOM-NEXT: ret
}
define i32 @test14(i32 %a, i32 %b) nounwind {
@@ -216,5 +294,53 @@ define i32 @test14(i32 %a, i32 %b) nounwind {
; CHECK-NEXT: sbbl
; CHECK-NEXT: notl
; CHECK-NEXT: ret
+
+; ATOM: test14:
+; ATOM: cmpl
+; ATOM-NEXT: sbbl
+; ATOM-NEXT: notl
+; ATOM-NEXT: ret
+}
+
+; rdar://10961709
+define i32 @test15(i32 %x) nounwind {
+entry:
+ %cmp = icmp ne i32 %x, 0
+ %sub = sext i1 %cmp to i32
+ ret i32 %sub
+; CHECK: test15:
+; CHECK: negl
+; CHECK: sbbl
+
+; ATOM: test15:
+; ATOM: negl
+; ATOM: sbbl
}
+define i64 @test16(i64 %x) nounwind uwtable readnone ssp {
+entry:
+ %cmp = icmp ne i64 %x, 0
+ %conv1 = sext i1 %cmp to i64
+ ret i64 %conv1
+; CHECK: test16:
+; CHECK: negq
+; CHECK: sbbq
+
+; ATOM: test16:
+; ATOM: negq
+; ATOM: sbbq
+}
+
+define i16 @test17(i16 %x) nounwind {
+entry:
+ %cmp = icmp ne i16 %x, 0
+ %sub = sext i1 %cmp to i16
+ ret i16 %sub
+; CHECK: test17:
+; CHECK: negw
+; CHECK: sbbw
+
+; ATOM: test17:
+; ATOM: negw
+; ATOM: sbbw
+}
diff --git a/test/CodeGen/X86/selectiondag-cse.ll b/test/CodeGen/X86/selectiondag-cse.ll
new file mode 100644
index 000000000000..a653a1c8ca3b
--- /dev/null
+++ b/test/CodeGen/X86/selectiondag-cse.ll
@@ -0,0 +1,69 @@
+; RUN: llc < %s
+; PR12599
+;
+; This bitcode causes the X86 target to make changes to the DAG during
+; selection in MatchAddressRecursively. The edit triggers CSE which causes both
+; the current node and yet-to-be-selected nodes to be deleted.
+;
+; SelectionDAGISel::DoInstructionSelection must handle that.
+;
+target triple = "x86_64-apple-macosx"
+
+%0 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i8**, i32, i32***, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [9 x [16 x [16 x i16]]], [5 x [16 x [16 x i16]]], [9 x [8 x [8 x i16]]], [2 x [4 x [16 x [16 x i16]]]], [16 x [16 x i16]], [16 x [16 x i32]], i32****, i32***, i32***, i32***, i32****, i32****, %1*, %2*, %9*, i32*, i32*, i32, i32, i32, i32, [4 x [4 x i32]], i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i16******, i16******, i16******, i16******, [15 x i16], i32, i32, i32, i32, i32, i32, i32, i32, [6 x [32 x i32]], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [1 x i32], i32, i32, [2 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %10*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, double**, double***, i32***, double**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x [2 x i32]], [2 x i32], i32, i32, i16, i32, i32, i32, i32, i32 }
+%1 = type { i32, i32, [100 x %2*], i32, float, float, float }
+%2 = type { i32, i32, i32, i32, i32, i32, %3*, %6*, %8*, i32, i32*, i32*, i32*, i32, i32*, i32*, i32*, i32 (i32)*, [3 x [2 x i32]] }
+%3 = type { %4*, %5, %5 }
+%4 = type { i32, i32, i8, i32, i32, i8, i8, i32, i32, i8*, i32 }
+%5 = type { i32, i32, i32, i32, i32, i8*, i32*, i32, i32 }
+%6 = type { [3 x [11 x %7]], [2 x [9 x %7]], [2 x [10 x %7]], [2 x [6 x %7]], [4 x %7], [4 x %7], [3 x %7] }
+%7 = type { i16, i8, i64 }
+%8 = type { [2 x %7], [4 x %7], [3 x [4 x %7]], [10 x [4 x %7]], [10 x [15 x %7]], [10 x [15 x %7]], [10 x [5 x %7]], [10 x [5 x %7]], [10 x [15 x %7]], [10 x [15 x %7]] }
+%9 = type { i32, i32, i32, [2 x i32], i32, [8 x i32], %9*, %9*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+%10 = type { i32, i32, i32, i32, i32, %10* }
+
+@images = external hidden global %0, align 8
+
+define hidden fastcc void @Mode_Decision_for_4x4IntraBlocks() nounwind uwtable ssp {
+bb4:
+ %tmp = or i208 undef, 0
+ br i1 undef, label %bb35, label %bb5
+
+bb5:
+ %tmp6 = add i32 0, 2
+ %tmp7 = lshr i208 %tmp, 80
+ %tmp8 = trunc i208 %tmp7 to i32
+ %tmp9 = and i32 %tmp8, 65535
+ %tmp10 = shl nuw nsw i32 %tmp9, 1
+ %tmp11 = add i32 0, 2
+ %tmp12 = add i32 %tmp11, 0
+ %tmp13 = add i32 %tmp12, %tmp10
+ %tmp14 = lshr i32 %tmp13, 2
+ %tmp15 = trunc i32 %tmp14 to i16
+ store i16 %tmp15, i16* getelementptr inbounds (%0* @images, i64 0, i32 47, i64 3, i64 0, i64 3), align 2
+ %tmp16 = lshr i208 %tmp, 96
+ %tmp17 = trunc i208 %tmp16 to i32
+ %tmp18 = and i32 %tmp17, 65535
+ %tmp19 = add i32 %tmp18, 2
+ %tmp20 = add i32 %tmp19, 0
+ %tmp21 = add i32 %tmp20, 0
+ %tmp22 = lshr i32 %tmp21, 2
+ %tmp23 = trunc i32 %tmp22 to i16
+ store i16 %tmp23, i16* getelementptr inbounds (%0* @images, i64 0, i32 47, i64 3, i64 2, i64 3), align 2
+ %tmp24 = add i32 %tmp6, %tmp9
+ %tmp25 = add i32 %tmp24, 0
+ %tmp26 = lshr i32 %tmp25, 2
+ %tmp27 = trunc i32 %tmp26 to i16
+ store i16 %tmp27, i16* getelementptr inbounds (%0* @images, i64 0, i32 47, i64 7, i64 1, i64 2), align 4
+ %tmp28 = lshr i208 %tmp, 80
+ %tmp29 = shl nuw nsw i208 %tmp28, 1
+ %tmp30 = trunc i208 %tmp29 to i32
+ %tmp31 = and i32 %tmp30, 131070
+ %tmp32 = add i32 %tmp12, %tmp31
+ %tmp33 = lshr i32 %tmp32, 2
+ %tmp34 = trunc i32 %tmp33 to i16
+ store i16 %tmp34, i16* getelementptr inbounds (%0* @images, i64 0, i32 47, i64 7, i64 1, i64 3), align 2
+ br label %bb35
+
+bb35: ; preds = %bb5, %bb4
+ unreachable
+}
diff --git a/test/CodeGen/X86/sext-setcc-self.ll b/test/CodeGen/X86/sext-setcc-self.ll
new file mode 100644
index 000000000000..23d66a24724d
--- /dev/null
+++ b/test/CodeGen/X86/sext-setcc-self.ll
@@ -0,0 +1,55 @@
+; RUN: llc -march=x86-64 -mcpu=nehalem -asm-verbose=false < %s | FileCheck %s
+
+define <4 x i32> @test_ueq(<4 x float> %in) {
+entry:
+ ; CHECK: pcmpeqd %xmm0, %xmm0
+ ; CHECK-NEXT: ret
+ %0 = fcmp ueq <4 x float> %in, %in
+ %1 = sext <4 x i1> %0 to <4 x i32>
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @test_uge(<4 x float> %in) {
+entry:
+ ; CHECK: pcmpeqd %xmm0, %xmm0
+ ; CHECK-NEXT: ret
+ %0 = fcmp uge <4 x float> %in, %in
+ %1 = sext <4 x i1> %0 to <4 x i32>
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @test_ule(<4 x float> %in) {
+entry:
+ ; CHECK: pcmpeqd %xmm0, %xmm0
+ ; CHECK-NEXT: ret
+ %0 = fcmp ule <4 x float> %in, %in
+ %1 = sext <4 x i1> %0 to <4 x i32>
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @test_one(<4 x float> %in) {
+entry:
+ ; CHECK: xorps %xmm0, %xmm0
+ ; CHECK-NEXT: ret
+ %0 = fcmp one <4 x float> %in, %in
+ %1 = sext <4 x i1> %0 to <4 x i32>
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @test_ogt(<4 x float> %in) {
+entry:
+ ; CHECK: xorps %xmm0, %xmm0
+ ; CHECK-NEXT: ret
+ %0 = fcmp ogt <4 x float> %in, %in
+ %1 = sext <4 x i1> %0 to <4 x i32>
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @test_olt(<4 x float> %in) {
+entry:
+ ; CHECK: xorps %xmm0, %xmm0
+ ; CHECK-NEXT: ret
+ %0 = fcmp olt <4 x float> %in, %in
+ %1 = sext <4 x i1> %0 to <4 x i32>
+ ret <4 x i32> %1
+}
diff --git a/test/CodeGen/X86/shift-and.ll b/test/CodeGen/X86/shift-and.ll
index b747cc5580ca..1de915164f0c 100644
--- a/test/CodeGen/X86/shift-and.ll
+++ b/test/CodeGen/X86/shift-and.ll
@@ -1,13 +1,27 @@
-; RUN: llc < %s -march=x86 | grep and | count 2
-; RUN: llc < %s -march=x86-64 | not grep and
+; RUN: llc < %s -mtriple=i386-apple-macosx | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck %s --check-prefix=X64
define i32 @t1(i32 %t, i32 %val) nounwind {
+; X32: t1:
+; X32-NOT: andl
+; X32: shll
+
+; X64: t1:
+; X64-NOT: andl
+; X64: shll
%shamt = and i32 %t, 31
%res = shl i32 %val, %shamt
ret i32 %res
}
define i32 @t2(i32 %t, i32 %val) nounwind {
+; X32: t2:
+; X32-NOT: andl
+; X32: shll
+
+; X64: t2:
+; X64-NOT: andl
+; X64: shll
%shamt = and i32 %t, 63
%res = shl i32 %val, %shamt
ret i32 %res
@@ -16,6 +30,13 @@ define i32 @t2(i32 %t, i32 %val) nounwind {
@X = internal global i16 0
define void @t3(i16 %t) nounwind {
+; X32: t3:
+; X32-NOT: andl
+; X32: sarw
+
+; X64: t3:
+; X64-NOT: andl
+; X64: sarw
%shamt = and i16 %t, 31
%tmp = load i16* @X
%tmp1 = ashr i16 %tmp, %shamt
@@ -24,13 +45,34 @@ define void @t3(i16 %t) nounwind {
}
define i64 @t4(i64 %t, i64 %val) nounwind {
+; X64: t4:
+; X64-NOT: and
+; X64: shrq
%shamt = and i64 %t, 63
%res = lshr i64 %val, %shamt
ret i64 %res
}
define i64 @t5(i64 %t, i64 %val) nounwind {
+; X64: t5:
+; X64-NOT: and
+; X64: shrq
%shamt = and i64 %t, 191
%res = lshr i64 %val, %shamt
ret i64 %res
}
+
+
+; rdar://11866926
+define i64 @t6(i64 %key, i64* nocapture %val) nounwind {
+entry:
+; X64: t6:
+; X64-NOT: movabsq
+; X64: decq
+; X64: andq
+ %shr = lshr i64 %key, 3
+ %0 = load i64* %val, align 8
+ %sub = add i64 %0, 2305843009213693951
+ %and = and i64 %sub, %shr
+ ret i64 %and
+}
diff --git a/test/CodeGen/X86/shift-coalesce.ll b/test/CodeGen/X86/shift-coalesce.ll
index d38f9a88fcd6..4f27e97fb390 100644
--- a/test/CodeGen/X86/shift-coalesce.ll
+++ b/test/CodeGen/X86/shift-coalesce.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
-; RUN: grep {shld.*CL}
+; RUN: grep "shld.*CL"
; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
-; RUN: not grep {mov CL, BL}
+; RUN: not grep "mov CL, BL"
; PR687
diff --git a/test/CodeGen/X86/shift-double.ll b/test/CodeGen/X86/shift-double.ll
index 5adee7c76941..8d2b2907c5a7 100644
--- a/test/CodeGen/X86/shift-double.ll
+++ b/test/CodeGen/X86/shift-double.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
-; RUN: grep {sh\[lr\]d} | count 5
+; RUN: grep "sh[lr]d" | count 5
define i64 @test1(i64 %X, i8 %C) {
%shift.upgrd.1 = zext i8 %C to i64 ; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/shift-folding.ll b/test/CodeGen/X86/shift-folding.ll
index 3ea601147bb0..c518cdd3aa4e 100644
--- a/test/CodeGen/X86/shift-folding.ll
+++ b/test/CodeGen/X86/shift-folding.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -march=x86 -verify-coalescing | FileCheck %s
define i32* @test1(i32* %P, i32 %X) {
; CHECK: test1:
diff --git a/test/CodeGen/X86/shl_elim.ll b/test/CodeGen/X86/shl_elim.ll
index 0827221875b1..83e1eb5c39e7 100644
--- a/test/CodeGen/X86/shl_elim.ll
+++ b/test/CodeGen/X86/shl_elim.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 | grep {movl 8(.esp), %eax}
-; RUN: llc < %s -march=x86 | grep {shrl .eax}
-; RUN: llc < %s -march=x86 | grep {movswl .ax, .eax}
+; RUN: llc < %s -march=x86 | grep "movl 8(.esp), %eax"
+; RUN: llc < %s -march=x86 | grep "shrl .eax"
+; RUN: llc < %s -march=x86 | grep "movswl .ax, .eax"
define i32 @test1(i64 %a) nounwind {
%tmp29 = lshr i64 %a, 24 ; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/sincos.ll b/test/CodeGen/X86/sincos.ll
index 13f932982f14..1479be1f56ba 100644
--- a/test/CodeGen/X86/sincos.ll
+++ b/test/CodeGen/X86/sincos.ll
@@ -1,8 +1,6 @@
; Make sure this testcase codegens to the sin and cos instructions, not calls
-; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math | \
-; RUN: grep sin\$ | count 3
-; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math | \
-; RUN: grep cos\$ | count 3
+; RUN: llc < %s -mtriple=i686-apple-macosx -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math | FileCheck %s --check-prefix=SIN
+; RUN: llc < %s -mtriple=i686-apple-macosx -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math | FileCheck %s --check-prefix=COS
declare float @sinf(float) readonly
@@ -10,39 +8,59 @@ declare double @sin(double) readonly
declare x86_fp80 @sinl(x86_fp80) readonly
+; SIN: test1:
define float @test1(float %X) {
%Y = call float @sinf(float %X) readonly
ret float %Y
}
+; SIN: {{^[ \t]*fsin$}}
+; SIN-NOT: fsin
+
+; SIN: test2:
define double @test2(double %X) {
%Y = call double @sin(double %X) readonly
ret double %Y
}
+; SIN: {{^[ \t]*fsin$}}
+
+; SIN-NOT: fsin
+; SIN: test3:
define x86_fp80 @test3(x86_fp80 %X) {
%Y = call x86_fp80 @sinl(x86_fp80 %X) readonly
ret x86_fp80 %Y
}
+; SIN: {{^[ \t]*fsin$}}
+; SIN-NOT: fsin
+; COS-NOT: fcos
declare float @cosf(float) readonly
declare double @cos(double) readonly
declare x86_fp80 @cosl(x86_fp80) readonly
+
+; SIN: test4:
+; COS: test3:
define float @test4(float %X) {
%Y = call float @cosf(float %X) readonly
ret float %Y
}
+; COS: {{^[ \t]*fcos}}
define double @test5(double %X) {
%Y = call double @cos(double %X) readonly
ret double %Y
}
+; COS: {{^[ \t]*fcos}}
define x86_fp80 @test6(x86_fp80 %X) {
%Y = call x86_fp80 @cosl(x86_fp80 %X) readonly
ret x86_fp80 %Y
}
+; COS: {{^[ \t]*fcos}}
+; SIN-NOT: fsin
+; COS-NOT: fcos
diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll
index 7957eb849673..649cd61ab78c 100644
--- a/test/CodeGen/X86/sink-hoist.ll
+++ b/test/CodeGen/X86/sink-hoist.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -asm-verbose=false -mtriple=x86_64-unknown-linux-gnu -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -march=x86-64 -asm-verbose=false -mtriple=x86_64-unknown-linux-gnu -mcpu=nehalem -post-RA-scheduler=true | FileCheck %s
; Currently, floating-point selects are lowered to CFG triangles.
; This means that one side of the select is always unconditionally
diff --git a/test/CodeGen/X86/sink-out-of-loop.ll b/test/CodeGen/X86/sink-out-of-loop.ll
new file mode 100644
index 000000000000..c600f925a32b
--- /dev/null
+++ b/test/CodeGen/X86/sink-out-of-loop.ll
@@ -0,0 +1,54 @@
+; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+
+; A MOV32ri is inside a loop, it has two successors, one successor is inside the
+; same loop, the other successor is outside the loop. We should be able to sink
+; MOV32ri outside the loop.
+; rdar://11980766
+define i32 @sink_succ(i32 %argc, i8** nocapture %argv) nounwind uwtable ssp {
+; CHECK: sink_succ
+; CHECK: [[OUTER_LN1:LBB0_[0-9]+]]: ## %preheader
+; CHECK: %exit
+; CHECK-NOT: movl
+; CHECK: jne [[OUTER_LN1]]
+; CHECK: movl
+; CHECK: [[LN2:LBB0_[0-9]+]]: ## %for.body2
+; CHECK: jne [[LN2]]
+; CHECK: ret
+entry:
+ br label %preheader
+
+preheader:
+ %i.127 = phi i32 [ 0, %entry ], [ %inc9, %exit ]
+ br label %for.body1.lr
+
+for.body1.lr:
+ %iv30 = phi i32 [ 1, %preheader ], [ %iv.next31, %for.inc40.i ]
+ br label %for.body1
+
+for.body1:
+ %iv.i = phi i64 [ 0, %for.body1.lr ], [ %iv.next.i, %for.body1 ]
+ %iv.next.i = add i64 %iv.i, 1
+ %lftr.wideiv32 = trunc i64 %iv.next.i to i32
+ %exitcond33 = icmp eq i32 %lftr.wideiv32, %iv30
+ br i1 %exitcond33, label %for.inc40.i, label %for.body1
+
+for.inc40.i:
+ %iv.next31 = add i32 %iv30, 1
+ %exitcond49.i = icmp eq i32 %iv.next31, 32
+ br i1 %exitcond49.i, label %exit, label %for.body1.lr
+
+exit:
+ %inc9 = add nsw i32 %i.127, 1
+ %exitcond34 = icmp eq i32 %inc9, 10
+ br i1 %exitcond34, label %for.body2, label %preheader
+
+for.body2:
+ %iv = phi i64 [ %iv.next, %for.body2 ], [ 0, %exit ]
+ %iv.next = add i64 %iv, 1
+ %lftr.wideiv = trunc i64 %iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 2048
+ br i1 %exitcond, label %for.end20, label %for.body2
+
+for.end20:
+ ret i32 0
+}
diff --git a/test/CodeGen/X86/splat-scalar-load.ll b/test/CodeGen/X86/splat-scalar-load.ll
index 81a072fb396a..980f18c8b911 100644
--- a/test/CodeGen/X86/splat-scalar-load.ll
+++ b/test/CodeGen/X86/splat-scalar-load.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -mcpu=nehalem | FileCheck %s
; rdar://7434544
define <2 x i64> @t2() nounwind {
diff --git a/test/CodeGen/X86/sse-align-12.ll b/test/CodeGen/X86/sse-align-12.ll
index 118e393b7baa..71a42f4db34a 100644
--- a/test/CodeGen/X86/sse-align-12.ll
+++ b/test/CodeGen/X86/sse-align-12.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=nehalem | FileCheck %s
; CHECK: a:
; CHECK: movdqu
diff --git a/test/CodeGen/X86/sse-domains.ll b/test/CodeGen/X86/sse-domains.ll
index d1e07c856364..c99287bdfb9f 100644
--- a/test/CodeGen/X86/sse-domains.ll
+++ b/test/CodeGen/X86/sse-domains.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s -mcpu=nehalem | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.7"
diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll
index 11124409f058..3839e875615f 100644
--- a/test/CodeGen/X86/sse-minmax.ll
+++ b/test/CodeGen/X86/sse-minmax.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86-64 -asm-verbose=false -join-physregs -promote-elements | FileCheck %s
-; RUN: llc < %s -march=x86-64 -asm-verbose=false -join-physregs -enable-unsafe-fp-math -enable-no-nans-fp-math -promote-elements | FileCheck -check-prefix=UNSAFE %s
-; RUN: llc < %s -march=x86-64 -asm-verbose=false -join-physregs -enable-no-nans-fp-math -promote-elements | FileCheck -check-prefix=FINITE %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=nehalem -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=nehalem -asm-verbose=false -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=UNSAFE %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=nehalem -asm-verbose=false -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s
; Some of these patterns can be matched as SSE min or max. Some of
; then can be matched provided that the operands are swapped.
@@ -8,13 +8,10 @@
; and a conditional branch.
; The naming convention is {,x_,y_}{o,u}{gt,lt,ge,le}{,_inverse}
-; x_ : use 0.0 instead of %y
-; y_ : use -0.0 instead of %y
+; _x: use 0.0 instead of %y
+; _y: use -0.0 instead of %y
; _inverse : swap the arms of the select.
-; Some of these tests depend on -join-physregs commuting instructions to
-; eliminate copies.
-
; CHECK: ogt:
; CHECK-NEXT: maxsd %xmm1, %xmm0
; CHECK-NEXT: ret
@@ -139,147 +136,147 @@ define double @ole_inverse(double %x, double %y) nounwind {
ret double %d
}
-; CHECK: x_ogt:
+; CHECK: ogt_x:
; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; CHECK-NEXT: maxsd %xmm1, %xmm0
; CHECK-NEXT: ret
-; UNSAFE: x_ogt:
+; UNSAFE: ogt_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: maxsd %xmm1, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: x_ogt:
+; FINITE: ogt_x:
; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; FINITE-NEXT: maxsd %xmm1, %xmm0
; FINITE-NEXT: ret
-define double @x_ogt(double %x) nounwind {
+define double @ogt_x(double %x) nounwind {
%c = fcmp ogt double %x, 0.000000e+00
%d = select i1 %c, double %x, double 0.000000e+00
ret double %d
}
-; CHECK: x_olt:
+; CHECK: olt_x:
; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; CHECK-NEXT: minsd %xmm1, %xmm0
; CHECK-NEXT: ret
-; UNSAFE: x_olt:
+; UNSAFE: olt_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: minsd %xmm1, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: x_olt:
+; FINITE: olt_x:
; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; FINITE-NEXT: minsd %xmm1, %xmm0
; FINITE-NEXT: ret
-define double @x_olt(double %x) nounwind {
+define double @olt_x(double %x) nounwind {
%c = fcmp olt double %x, 0.000000e+00
%d = select i1 %c, double %x, double 0.000000e+00
ret double %d
}
-; CHECK: x_ogt_inverse:
+; CHECK: ogt_inverse_x:
; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; CHECK-NEXT: minsd %xmm0, %xmm1
; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
; CHECK-NEXT: ret
-; UNSAFE: x_ogt_inverse:
+; UNSAFE: ogt_inverse_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: minsd %xmm0, %xmm1
; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: x_ogt_inverse:
+; FINITE: ogt_inverse_x:
; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; FINITE-NEXT: minsd %xmm0, %xmm1
; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; FINITE-NEXT: ret
-define double @x_ogt_inverse(double %x) nounwind {
+define double @ogt_inverse_x(double %x) nounwind {
%c = fcmp ogt double %x, 0.000000e+00
%d = select i1 %c, double 0.000000e+00, double %x
ret double %d
}
-; CHECK: x_olt_inverse:
+; CHECK: olt_inverse_x:
; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; CHECK-NEXT: maxsd %xmm0, %xmm1
; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
; CHECK-NEXT: ret
-; UNSAFE: x_olt_inverse:
+; UNSAFE: olt_inverse_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: maxsd %xmm0, %xmm1
; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: x_olt_inverse:
+; FINITE: olt_inverse_x:
; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; FINITE-NEXT: maxsd %xmm0, %xmm1
; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; FINITE-NEXT: ret
-define double @x_olt_inverse(double %x) nounwind {
+define double @olt_inverse_x(double %x) nounwind {
%c = fcmp olt double %x, 0.000000e+00
%d = select i1 %c, double 0.000000e+00, double %x
ret double %d
}
-; CHECK: x_oge:
+; CHECK: oge_x:
; CHECK: ucomisd %xmm1, %xmm0
-; UNSAFE: x_oge:
+; UNSAFE: oge_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: maxsd %xmm1, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: x_oge:
+; FINITE: oge_x:
; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; FINITE-NEXT: maxsd %xmm1, %xmm0
; FINITE-NEXT: ret
-define double @x_oge(double %x) nounwind {
+define double @oge_x(double %x) nounwind {
%c = fcmp oge double %x, 0.000000e+00
%d = select i1 %c, double %x, double 0.000000e+00
ret double %d
}
-; CHECK: x_ole:
+; CHECK: ole_x:
; CHECK: ucomisd %xmm0, %xmm1
-; UNSAFE: x_ole:
+; UNSAFE: ole_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: minsd %xmm1, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: x_ole:
+; FINITE: ole_x:
; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; FINITE-NEXT: minsd %xmm1, %xmm0
; FINITE-NEXT: ret
-define double @x_ole(double %x) nounwind {
+define double @ole_x(double %x) nounwind {
%c = fcmp ole double %x, 0.000000e+00
%d = select i1 %c, double %x, double 0.000000e+00
ret double %d
}
-; CHECK: x_oge_inverse:
-; CHECK: ucomisd %xmm1, %xmm0
-; UNSAFE: x_oge_inverse:
+; CHECK: oge_inverse_x:
+; CHECK: ucomisd %xmm
+; UNSAFE: oge_inverse_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: minsd %xmm0, %xmm1
; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: x_oge_inverse:
+; FINITE: oge_inverse_x:
; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; FINITE-NEXT: minsd %xmm0, %xmm1
; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; FINITE-NEXT: ret
-define double @x_oge_inverse(double %x) nounwind {
+define double @oge_inverse_x(double %x) nounwind {
%c = fcmp oge double %x, 0.000000e+00
%d = select i1 %c, double 0.000000e+00, double %x
ret double %d
}
-; CHECK: x_ole_inverse:
-; CHECK: ucomisd %xmm0, %xmm1
-; UNSAFE: x_ole_inverse:
+; CHECK: ole_inverse_x:
+; CHECK: ucomisd %xmm
+; UNSAFE: ole_inverse_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: maxsd %xmm0, %xmm1
; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: x_ole_inverse:
+; FINITE: ole_inverse_x:
; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; FINITE-NEXT: maxsd %xmm0, %xmm1
; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; FINITE-NEXT: ret
-define double @x_ole_inverse(double %x) nounwind {
+define double @ole_inverse_x(double %x) nounwind {
%c = fcmp ole double %x, 0.000000e+00
%d = select i1 %c, double 0.000000e+00, double %x
ret double %d
@@ -411,419 +408,419 @@ define double @ule_inverse(double %x, double %y) nounwind {
ret double %d
}
-; CHECK: x_ugt:
+; CHECK: ugt_x:
; CHECK: ucomisd %xmm0, %xmm1
-; UNSAFE: x_ugt:
+; UNSAFE: ugt_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: maxsd %xmm1, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: x_ugt:
+; FINITE: ugt_x:
; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; FINITE-NEXT: maxsd %xmm1, %xmm0
; FINITE-NEXT: ret
-define double @x_ugt(double %x) nounwind {
+define double @ugt_x(double %x) nounwind {
%c = fcmp ugt double %x, 0.000000e+00
%d = select i1 %c, double %x, double 0.000000e+00
ret double %d
}
-; CHECK: x_ult:
+; CHECK: ult_x:
; CHECK: ucomisd %xmm1, %xmm0
-; UNSAFE: x_ult:
+; UNSAFE: ult_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: minsd %xmm1, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: x_ult:
+; FINITE: ult_x:
; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; FINITE-NEXT: minsd %xmm1, %xmm0
; FINITE-NEXT: ret
-define double @x_ult(double %x) nounwind {
+define double @ult_x(double %x) nounwind {
%c = fcmp ult double %x, 0.000000e+00
%d = select i1 %c, double %x, double 0.000000e+00
ret double %d
}
-; CHECK: x_ugt_inverse:
-; CHECK: ucomisd %xmm0, %xmm1
-; UNSAFE: x_ugt_inverse:
+; CHECK: ugt_inverse_x:
+; CHECK: ucomisd %xmm
+; UNSAFE: ugt_inverse_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: minsd %xmm0, %xmm1
; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: x_ugt_inverse:
+; FINITE: ugt_inverse_x:
; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; FINITE-NEXT: minsd %xmm0, %xmm1
; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; FINITE-NEXT: ret
-define double @x_ugt_inverse(double %x) nounwind {
+define double @ugt_inverse_x(double %x) nounwind {
%c = fcmp ugt double %x, 0.000000e+00
%d = select i1 %c, double 0.000000e+00, double %x
ret double %d
}
-; CHECK: x_ult_inverse:
-; CHECK: ucomisd %xmm1, %xmm0
-; UNSAFE: x_ult_inverse:
+; CHECK: ult_inverse_x:
+; CHECK: ucomisd %xmm
+; UNSAFE: ult_inverse_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: maxsd %xmm0, %xmm1
; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: x_ult_inverse:
+; FINITE: ult_inverse_x:
; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; FINITE-NEXT: maxsd %xmm0, %xmm1
; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; FINITE-NEXT: ret
-define double @x_ult_inverse(double %x) nounwind {
+define double @ult_inverse_x(double %x) nounwind {
%c = fcmp ult double %x, 0.000000e+00
%d = select i1 %c, double 0.000000e+00, double %x
ret double %d
}
-; CHECK: x_uge:
+; CHECK: uge_x:
; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; CHECK-NEXT: maxsd %xmm0, %xmm1
; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
; CHECK-NEXT: ret
-; UNSAFE: x_uge:
+; UNSAFE: uge_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: maxsd %xmm1, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: x_uge:
+; FINITE: uge_x:
; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; FINITE-NEXT: maxsd %xmm1, %xmm0
; FINITE-NEXT: ret
-define double @x_uge(double %x) nounwind {
+define double @uge_x(double %x) nounwind {
%c = fcmp uge double %x, 0.000000e+00
%d = select i1 %c, double %x, double 0.000000e+00
ret double %d
}
-; CHECK: x_ule:
+; CHECK: ule_x:
; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; CHECK-NEXT: minsd %xmm0, %xmm1
; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
; CHECK-NEXT: ret
-; UNSAFE: x_ule:
+; UNSAFE: ule_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: minsd %xmm1, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: x_ule:
+; FINITE: ule_x:
; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; FINITE-NEXT: minsd %xmm1, %xmm0
; FINITE-NEXT: ret
-define double @x_ule(double %x) nounwind {
+define double @ule_x(double %x) nounwind {
%c = fcmp ule double %x, 0.000000e+00
%d = select i1 %c, double %x, double 0.000000e+00
ret double %d
}
-; CHECK: x_uge_inverse:
+; CHECK: uge_inverse_x:
; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; CHECK-NEXT: minsd %xmm1, %xmm0
; CHECK-NEXT: ret
-; UNSAFE: x_uge_inverse:
+; UNSAFE: uge_inverse_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: minsd %xmm0, %xmm1
; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: x_uge_inverse:
+; FINITE: uge_inverse_x:
; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; FINITE-NEXT: minsd %xmm0, %xmm1
; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; FINITE-NEXT: ret
-define double @x_uge_inverse(double %x) nounwind {
+define double @uge_inverse_x(double %x) nounwind {
%c = fcmp uge double %x, 0.000000e+00
%d = select i1 %c, double 0.000000e+00, double %x
ret double %d
}
-; CHECK: x_ule_inverse:
+; CHECK: ule_inverse_x:
; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; CHECK-NEXT: maxsd %xmm1, %xmm0
; CHECK-NEXT: ret
-; UNSAFE: x_ule_inverse:
+; UNSAFE: ule_inverse_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: maxsd %xmm0, %xmm1
; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: x_ule_inverse:
+; FINITE: ule_inverse_x:
; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; FINITE-NEXT: maxsd %xmm0, %xmm1
; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; FINITE-NEXT: ret
-define double @x_ule_inverse(double %x) nounwind {
+define double @ule_inverse_x(double %x) nounwind {
%c = fcmp ule double %x, 0.000000e+00
%d = select i1 %c, double 0.000000e+00, double %x
ret double %d
}
-; CHECK: y_ogt:
+; CHECK: ogt_y:
; CHECK-NEXT: maxsd {{[^,]*}}, %xmm0
; CHECK-NEXT: ret
-; UNSAFE: y_ogt:
+; UNSAFE: ogt_y:
; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: y_ogt:
+; FINITE: ogt_y:
; FINITE-NEXT: maxsd {{[^,]*}}, %xmm0
; FINITE-NEXT: ret
-define double @y_ogt(double %x) nounwind {
+define double @ogt_y(double %x) nounwind {
%c = fcmp ogt double %x, -0.000000e+00
%d = select i1 %c, double %x, double -0.000000e+00
ret double %d
}
-; CHECK: y_olt:
+; CHECK: olt_y:
; CHECK-NEXT: minsd {{[^,]*}}, %xmm0
; CHECK-NEXT: ret
-; UNSAFE: y_olt:
+; UNSAFE: olt_y:
; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: y_olt:
+; FINITE: olt_y:
; FINITE-NEXT: minsd {{[^,]*}}, %xmm0
; FINITE-NEXT: ret
-define double @y_olt(double %x) nounwind {
+define double @olt_y(double %x) nounwind {
%c = fcmp olt double %x, -0.000000e+00
%d = select i1 %c, double %x, double -0.000000e+00
ret double %d
}
-; CHECK: y_ogt_inverse:
+; CHECK: ogt_inverse_y:
; CHECK-NEXT: movsd {{[^,]*}}, %xmm1
; CHECK-NEXT: minsd %xmm0, %xmm1
; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
; CHECK-NEXT: ret
-; UNSAFE: y_ogt_inverse:
+; UNSAFE: ogt_inverse_y:
; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1
; UNSAFE-NEXT: minsd %xmm0, %xmm1
; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: y_ogt_inverse:
+; FINITE: ogt_inverse_y:
; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
; FINITE-NEXT: minsd %xmm0, %xmm1
; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; FINITE-NEXT: ret
-define double @y_ogt_inverse(double %x) nounwind {
+define double @ogt_inverse_y(double %x) nounwind {
%c = fcmp ogt double %x, -0.000000e+00
%d = select i1 %c, double -0.000000e+00, double %x
ret double %d
}
-; CHECK: y_olt_inverse:
+; CHECK: olt_inverse_y:
; CHECK-NEXT: movsd {{[^,]*}}, %xmm1
; CHECK-NEXT: maxsd %xmm0, %xmm1
; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
; CHECK-NEXT: ret
-; UNSAFE: y_olt_inverse:
+; UNSAFE: olt_inverse_y:
; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1
; UNSAFE-NEXT: maxsd %xmm0, %xmm1
; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: y_olt_inverse:
+; FINITE: olt_inverse_y:
; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
; FINITE-NEXT: maxsd %xmm0, %xmm1
; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; FINITE-NEXT: ret
-define double @y_olt_inverse(double %x) nounwind {
+define double @olt_inverse_y(double %x) nounwind {
%c = fcmp olt double %x, -0.000000e+00
%d = select i1 %c, double -0.000000e+00, double %x
ret double %d
}
-; CHECK: y_oge:
+; CHECK: oge_y:
; CHECK: ucomisd %xmm1, %xmm0
-; UNSAFE: y_oge:
+; UNSAFE: oge_y:
; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: y_oge:
+; FINITE: oge_y:
; FINITE-NEXT: maxsd {{[^,]*}}, %xmm0
; FINITE-NEXT: ret
-define double @y_oge(double %x) nounwind {
+define double @oge_y(double %x) nounwind {
%c = fcmp oge double %x, -0.000000e+00
%d = select i1 %c, double %x, double -0.000000e+00
ret double %d
}
-; CHECK: y_ole:
+; CHECK: ole_y:
; CHECK: ucomisd %xmm0, %xmm1
-; UNSAFE: y_ole:
+; UNSAFE: ole_y:
; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: y_ole:
+; FINITE: ole_y:
; FINITE-NEXT: minsd {{[^,]*}}, %xmm0
; FINITE-NEXT: ret
-define double @y_ole(double %x) nounwind {
+define double @ole_y(double %x) nounwind {
%c = fcmp ole double %x, -0.000000e+00
%d = select i1 %c, double %x, double -0.000000e+00
ret double %d
}
-; CHECK: y_oge_inverse:
-; CHECK: ucomisd %xmm1, %xmm0
-; UNSAFE: y_oge_inverse:
+; CHECK: oge_inverse_y:
+; CHECK: ucomisd %xmm
+; UNSAFE: oge_inverse_y:
; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1
; UNSAFE-NEXT: minsd %xmm0, %xmm1
; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: y_oge_inverse:
+; FINITE: oge_inverse_y:
; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
; FINITE-NEXT: minsd %xmm0, %xmm1
; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; FINITE-NEXT: ret
-define double @y_oge_inverse(double %x) nounwind {
+define double @oge_inverse_y(double %x) nounwind {
%c = fcmp oge double %x, -0.000000e+00
%d = select i1 %c, double -0.000000e+00, double %x
ret double %d
}
-; CHECK: y_ole_inverse:
-; CHECK: ucomisd %xmm0, %xmm1
-; UNSAFE: y_ole_inverse:
+; CHECK: ole_inverse_y:
+; CHECK: ucomisd %xmm
+; UNSAFE: ole_inverse_y:
; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1
; UNSAFE-NEXT: maxsd %xmm0, %xmm1
; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: y_ole_inverse:
+; FINITE: ole_inverse_y:
; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
; FINITE-NEXT: maxsd %xmm0, %xmm1
; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; FINITE-NEXT: ret
-define double @y_ole_inverse(double %x) nounwind {
+define double @ole_inverse_y(double %x) nounwind {
%c = fcmp ole double %x, -0.000000e+00
%d = select i1 %c, double -0.000000e+00, double %x
ret double %d
}
-; CHECK: y_ugt:
+; CHECK: ugt_y:
; CHECK: ucomisd %xmm0, %xmm1
-; UNSAFE: y_ugt:
+; UNSAFE: ugt_y:
; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: y_ugt:
+; FINITE: ugt_y:
; FINITE-NEXT: maxsd {{[^,]*}}, %xmm0
; FINITE-NEXT: ret
-define double @y_ugt(double %x) nounwind {
+define double @ugt_y(double %x) nounwind {
%c = fcmp ugt double %x, -0.000000e+00
%d = select i1 %c, double %x, double -0.000000e+00
ret double %d
}
-; CHECK: y_ult:
+; CHECK: ult_y:
; CHECK: ucomisd %xmm1, %xmm0
-; UNSAFE: y_ult:
+; UNSAFE: ult_y:
; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: y_ult:
+; FINITE: ult_y:
; FINITE-NEXT: minsd {{[^,]*}}, %xmm0
; FINITE-NEXT: ret
-define double @y_ult(double %x) nounwind {
+define double @ult_y(double %x) nounwind {
%c = fcmp ult double %x, -0.000000e+00
%d = select i1 %c, double %x, double -0.000000e+00
ret double %d
}
-; CHECK: y_ugt_inverse:
-; CHECK: ucomisd %xmm0, %xmm1
-; UNSAFE: y_ugt_inverse:
+; CHECK: ugt_inverse_y:
+; CHECK: ucomisd %xmm
+; UNSAFE: ugt_inverse_y:
; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1
; UNSAFE-NEXT: minsd %xmm0, %xmm1
; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: y_ugt_inverse:
+; FINITE: ugt_inverse_y:
; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
; FINITE-NEXT: minsd %xmm0, %xmm1
; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; FINITE-NEXT: ret
-define double @y_ugt_inverse(double %x) nounwind {
+define double @ugt_inverse_y(double %x) nounwind {
%c = fcmp ugt double %x, -0.000000e+00
%d = select i1 %c, double -0.000000e+00, double %x
ret double %d
}
-; CHECK: y_ult_inverse:
-; CHECK: ucomisd %xmm1, %xmm0
-; UNSAFE: y_ult_inverse:
+; CHECK: ult_inverse_y:
+; CHECK: ucomisd %xmm
+; UNSAFE: ult_inverse_y:
; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1
; UNSAFE-NEXT: maxsd %xmm0, %xmm1
; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: y_ult_inverse:
+; FINITE: ult_inverse_y:
; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
; FINITE-NEXT: maxsd %xmm0, %xmm1
; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; FINITE-NEXT: ret
-define double @y_ult_inverse(double %x) nounwind {
+define double @ult_inverse_y(double %x) nounwind {
%c = fcmp ult double %x, -0.000000e+00
%d = select i1 %c, double -0.000000e+00, double %x
ret double %d
}
-; CHECK: y_uge:
+; CHECK: uge_y:
; CHECK-NEXT: movsd {{[^,]*}}, %xmm1
; CHECK-NEXT: maxsd %xmm0, %xmm1
; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
; CHECK-NEXT: ret
-; UNSAFE: y_uge:
+; UNSAFE: uge_y:
; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: y_uge:
+; FINITE: uge_y:
; FINITE-NEXT: maxsd {{[^,]*}}, %xmm0
; FINITE-NEXT: ret
-define double @y_uge(double %x) nounwind {
+define double @uge_y(double %x) nounwind {
%c = fcmp uge double %x, -0.000000e+00
%d = select i1 %c, double %x, double -0.000000e+00
ret double %d
}
-; CHECK: y_ule:
+; CHECK: ule_y:
; CHECK-NEXT: movsd {{[^,]*}}, %xmm1
; CHECK-NEXT: minsd %xmm0, %xmm1
; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
; CHECK-NEXT: ret
-; UNSAFE: y_ule:
+; UNSAFE: ule_y:
; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: y_ule:
+; FINITE: ule_y:
; FINITE-NEXT: minsd {{[^,]*}}, %xmm0
; FINITE-NEXT: ret
-define double @y_ule(double %x) nounwind {
+define double @ule_y(double %x) nounwind {
%c = fcmp ule double %x, -0.000000e+00
%d = select i1 %c, double %x, double -0.000000e+00
ret double %d
}
-; CHECK: y_uge_inverse:
+; CHECK: uge_inverse_y:
; CHECK-NEXT: minsd {{[^,]*}}, %xmm0
; CHECK-NEXT: ret
-; UNSAFE: y_uge_inverse:
+; UNSAFE: uge_inverse_y:
; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1
; UNSAFE-NEXT: minsd %xmm0, %xmm1
; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: y_uge_inverse:
+; FINITE: uge_inverse_y:
; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
; FINITE-NEXT: minsd %xmm0, %xmm1
; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; FINITE-NEXT: ret
-define double @y_uge_inverse(double %x) nounwind {
+define double @uge_inverse_y(double %x) nounwind {
%c = fcmp uge double %x, -0.000000e+00
%d = select i1 %c, double -0.000000e+00, double %x
ret double %d
}
-; CHECK: y_ule_inverse:
+; CHECK: ule_inverse_y:
; CHECK-NEXT: maxsd {{[^,]*}}, %xmm0
; CHECK-NEXT: ret
-; UNSAFE: y_ule_inverse:
+; UNSAFE: ule_inverse_y:
; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1
; UNSAFE-NEXT: maxsd %xmm0, %xmm1
; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; UNSAFE-NEXT: ret
-; FINITE: y_ule_inverse:
+; FINITE: ule_inverse_y:
; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
; FINITE-NEXT: maxsd %xmm0, %xmm1
; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
; FINITE-NEXT: ret
-define double @y_ule_inverse(double %x) nounwind {
+define double @ule_inverse_y(double %x) nounwind {
%c = fcmp ule double %x, -0.000000e+00
%d = select i1 %c, double -0.000000e+00, double %x
ret double %d
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
index 5ea1b4dff1c1..48638b3b696c 100644
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -249,9 +249,10 @@ entry:
; X64: t16:
; X64: pextrw $8, %xmm0, %eax
; X64: pslldq $2, %xmm0
-; X64: movd %xmm0, %ecx
-; X64: pextrw $1, %xmm0, %edx
-; X64: pinsrw $0, %ecx, %xmm0
+; X64: pextrw $1, %xmm0, %ecx
+; X64: movzbl %cl, %ecx
+; X64: orl %eax, %ecx
+; X64: pinsrw $1, %ecx, %xmm0
; X64: ret
}
diff --git a/test/CodeGen/X86/sse41-blend.ll b/test/CodeGen/X86/sse41-blend.ll
index 1a1017d2c176..a2a0debf9e95 100644
--- a/test/CodeGen/X86/sse41-blend.ll
+++ b/test/CodeGen/X86/sse41-blend.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -promote-elements -mattr=+sse41 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse41 | FileCheck %s
;CHECK: vsel_float
;CHECK: blendvps
diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll
index 54264b16aea0..c6f9f0c873af 100644
--- a/test/CodeGen/X86/sse41.ll
+++ b/test/CodeGen/X86/sse41.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse41 | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse41 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse41 -mcpu=penryn | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse41 -mcpu=penryn | FileCheck %s -check-prefix=X64
@g16 = external global i16
diff --git a/test/CodeGen/X86/sse4a.ll b/test/CodeGen/X86/sse4a.ll
new file mode 100644
index 000000000000..076e21336492
--- /dev/null
+++ b/test/CodeGen/X86/sse4a.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse4a | FileCheck %s
+
+define void @test1(i8* %p, <4 x float> %a) nounwind optsize ssp {
+; CHECK: test1:
+; CHECK: movntss
+ tail call void @llvm.x86.sse4a.movnt.ss(i8* %p, <4 x float> %a) nounwind
+ ret void
+}
+
+declare void @llvm.x86.sse4a.movnt.ss(i8*, <4 x float>)
+
+define void @test2(i8* %p, <2 x double> %a) nounwind optsize ssp {
+; CHECK: test2:
+; CHECK: movntsd
+ tail call void @llvm.x86.sse4a.movnt.sd(i8* %p, <2 x double> %a) nounwind
+ ret void
+}
+
+declare void @llvm.x86.sse4a.movnt.sd(i8*, <2 x double>)
+
+define <2 x i64> @test3(<2 x i64> %x) nounwind uwtable ssp {
+; CHECK: test3:
+; CHECK: extrq
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2)
+ ret <2 x i64> %1
+}
+
+declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind
+
+define <2 x i64> @test4(<2 x i64> %x, <2 x i64> %y) nounwind uwtable ssp {
+; CHECK: test4:
+; CHECK: extrq
+ %1 = bitcast <2 x i64> %y to <16 x i8>
+ %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %1) nounwind
+ ret <2 x i64> %2
+}
+
+declare <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64>, <16 x i8>) nounwind
+
+define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y) nounwind uwtable ssp {
+; CHECK: test5:
+; CHECK: insertq
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 5, i8 6)
+ ret <2 x i64> %1
+}
+
+declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind
+
+define <2 x i64> @test6(<2 x i64> %x, <2 x i64> %y) nounwind uwtable ssp {
+; CHECK: test6:
+; CHECK: insertq
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
+ ret <2 x i64> %1
+}
+
+declare <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64>, <2 x i64>) nounwind
diff --git a/test/CodeGen/X86/sse_reload_fold.ll b/test/CodeGen/X86/sse_reload_fold.ll
index a57fa588f054..fd8db3be1063 100644
--- a/test/CodeGen/X86/sse_reload_fold.ll
+++ b/test/CodeGen/X86/sse_reload_fold.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux -mattr=+64bit,+sse3 -print-failed-fuse-candidates -regalloc=basic |& FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+64bit,+sse3 -print-failed-fuse-candidates -regalloc=basic 2>&1 | FileCheck %s
; CHECK: fail
; CHECK-NOT: fail
diff --git a/test/CodeGen/X86/stack-align.ll b/test/CodeGen/X86/stack-align.ll
index f6c13ec0adf7..0ddb2378ef2f 100644
--- a/test/CodeGen/X86/stack-align.ll
+++ b/test/CodeGen/X86/stack-align.ll
@@ -10,11 +10,11 @@ target triple = "i686-apple-darwin8"
define void @test({ double, double }* byval %z, double* %P) nounwind {
entry:
%tmp3 = load double* @G, align 16 ; <double> [#uses=1]
- %tmp4 = tail call double @fabs( double %tmp3 ) ; <double> [#uses=1]
+ %tmp4 = tail call double @fabs( double %tmp3 ) readnone ; <double> [#uses=1]
store volatile double %tmp4, double* %P
%tmp = getelementptr { double, double }* %z, i32 0, i32 0 ; <double*> [#uses=1]
%tmp1 = load volatile double* %tmp, align 8 ; <double> [#uses=1]
- %tmp2 = tail call double @fabs( double %tmp1 ) ; <double> [#uses=1]
+ %tmp2 = tail call double @fabs( double %tmp1 ) readnone ; <double> [#uses=1]
; CHECK: andpd{{.*}}4(%esp), %xmm
%tmp6 = fadd double %tmp4, %tmp2 ; <double> [#uses=1]
store volatile double %tmp6, double* %P, align 8
diff --git a/test/CodeGen/X86/stack-protector-linux.ll b/test/CodeGen/X86/stack-protector.ll
index fe2a9c5d57a1..c07511443bce 100644
--- a/test/CodeGen/X86/stack-protector-linux.ll
+++ b/test/CodeGen/X86/stack-protector.ll
@@ -1,8 +1,8 @@
; RUN: llc -mtriple=i386-pc-linux-gnu < %s -o - | grep %gs:
; RUN: llc -mtriple=x86_64-pc-linux-gnu < %s -o - | grep %fs:
; RUN: llc -code-model=kernel -mtriple=x86_64-pc-linux-gnu < %s -o - | grep %gs:
-; RUN: llc -mtriple=x86_64-apple-darwin < %s -o - | grep {__stack_chk_guard}
-; RUN: llc -mtriple=x86_64-apple-darwin < %s -o - | grep {__stack_chk_fail}
+; RUN: llc -mtriple=x86_64-apple-darwin < %s -o - | grep "__stack_chk_guard"
+; RUN: llc -mtriple=x86_64-apple-darwin < %s -o - | grep "__stack_chk_fail"
@"\01LC" = internal constant [11 x i8] c"buf == %s\0A\00" ; <[11 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/store_op_load_fold2.ll b/test/CodeGen/X86/store_op_load_fold2.ll
index 8313166a90cc..6e4fe90053f1 100644
--- a/test/CodeGen/X86/store_op_load_fold2.ll
+++ b/test/CodeGen/X86/store_op_load_fold2.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=i686-linux -x86-asm-syntax=att | FileCheck %s -check-prefix=ATT
-; RUN: llc < %s -mtriple=i686-linux -x86-asm-syntax=intel | FileCheck %s -check-prefix=INTEL
+; RUN: llc < %s -mtriple=i686-linux -mcpu=corei7 -x86-asm-syntax=att | FileCheck %s -check-prefix=ATT
+; RUN: llc < %s -mtriple=i686-linux -mcpu=corei7 -x86-asm-syntax=intel | FileCheck %s -check-prefix=INTEL
target datalayout = "e-p:32:32"
%struct.Macroblock = type { i32, i32, i32, i32, i32, [8 x i32], %struct.Macroblock*, %struct.Macroblock*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/subreg-to-reg-1.ll b/test/CodeGen/X86/subreg-to-reg-1.ll
index a297728aee89..4f31ab5a9229 100644
--- a/test/CodeGen/X86/subreg-to-reg-1.ll
+++ b/test/CodeGen/X86/subreg-to-reg-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | grep {leal .*), %e.\*} | count 1
+; RUN: llc < %s -march=x86-64 | grep "leal .*), %e.*" | count 1
; Don't eliminate or coalesce away the explicit zero-extension!
; This is currently using an leal because of a 3-addressification detail,
diff --git a/test/CodeGen/X86/subreg-to-reg-4.ll b/test/CodeGen/X86/subreg-to-reg-4.ll
index 0ea5541c89dc..0693789fe5d5 100644
--- a/test/CodeGen/X86/subreg-to-reg-4.ll
+++ b/test/CodeGen/X86/subreg-to-reg-4.ll
@@ -5,7 +5,7 @@
; RUN: not grep negq %t
; RUN: not grep addq %t
; RUN: not grep subq %t
-; RUN: not grep {movl %} %t
+; RUN: not grep "movl %" %t
; Utilize implicit zero-extension on x86-64 to eliminate explicit
; zero-extensions. Shrink 64-bit adds to 32-bit when the high
diff --git a/test/CodeGen/X86/switch-order-weight.ll b/test/CodeGen/X86/switch-order-weight.ll
new file mode 100644
index 000000000000..0fdd56d4e1d3
--- /dev/null
+++ b/test/CodeGen/X86/switch-order-weight.ll
@@ -0,0 +1,37 @@
+; RUN: llc -mtriple=x86_64-apple-darwin11 < %s | FileCheck %s
+
+; Check that the cases which lead to unreachable are checked after "10"
+
+define void @test1(i32 %x) nounwind uwtable ssp {
+entry:
+ switch i32 %x, label %if.end7 [
+ i32 0, label %if.then
+ i32 10, label %if.then2
+ i32 20, label %if.then5
+ ]
+
+; CHECK: test1:
+; CHECK-NOT: unr
+; CHECK: cmpl $10
+; CHECK: bar
+; CHECK: cmpl $20
+
+if.then:
+ tail call void @unr(i32 23) noreturn nounwind
+ unreachable
+
+if.then2:
+ tail call void @bar(i32 42) nounwind
+ br label %if.end7
+
+if.then5:
+ tail call void @unr(i32 5) noreturn nounwind
+ unreachable
+
+if.end7:
+ ret void
+}
+
+declare void @unr(i32) noreturn
+
+declare void @bar(i32)
diff --git a/test/CodeGen/X86/tailcall-64.ll b/test/CodeGen/X86/tailcall-64.ll
new file mode 100644
index 000000000000..70307534156e
--- /dev/null
+++ b/test/CodeGen/X86/tailcall-64.ll
@@ -0,0 +1,96 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin11.4.0"
+
+declare i64 @testi()
+
+define i64 @test_trivial() {
+ %A = tail call i64 @testi()
+ ret i64 %A
+}
+; CHECK: test_trivial:
+; CHECK: jmp _testi ## TAILCALL
+
+
+define i64 @test_noop_bitcast() {
+ %A = tail call i64 @testi()
+ %B = bitcast i64 %A to i64
+ ret i64 %B
+}
+; CHECK: test_noop_bitcast:
+; CHECK: jmp _testi ## TAILCALL
+
+
+; Tail call shouldn't be blocked by no-op inttoptr.
+define i8* @test_inttoptr() {
+ %A = tail call i64 @testi()
+ %B = inttoptr i64 %A to i8*
+ ret i8* %B
+}
+
+; CHECK: test_inttoptr:
+; CHECK: jmp _testi ## TAILCALL
+
+
+declare <4 x float> @testv()
+
+define <4 x i32> @test_vectorbitcast() {
+ %A = tail call <4 x float> @testv()
+ %B = bitcast <4 x float> %A to <4 x i32>
+ ret <4 x i32> %B
+}
+; CHECK: test_vectorbitcast:
+; CHECK: jmp _testv ## TAILCALL
+
+
+declare { i64, i64 } @testp()
+
+define {i64, i64} @test_pair_trivial() {
+ %A = tail call { i64, i64} @testp()
+ ret { i64, i64} %A
+}
+; CHECK: test_pair_trivial:
+; CHECK: jmp _testp ## TAILCALL
+
+
+
+define {i64, i64} @test_pair_trivial_extract() {
+ %A = tail call { i64, i64} @testp()
+ %x = extractvalue { i64, i64} %A, 0
+ %y = extractvalue { i64, i64} %A, 1
+
+ %b = insertvalue {i64, i64} undef, i64 %x, 0
+ %c = insertvalue {i64, i64} %b, i64 %y, 1
+
+ ret { i64, i64} %c
+}
+
+; CHECK: test_pair_trivial_extract:
+; CHECK: jmp _testp ## TAILCALL
+
+define {i8*, i64} @test_pair_conv_extract() {
+ %A = tail call { i64, i64} @testp()
+ %x = extractvalue { i64, i64} %A, 0
+ %y = extractvalue { i64, i64} %A, 1
+
+ %x1 = inttoptr i64 %x to i8*
+
+ %b = insertvalue {i8*, i64} undef, i8* %x1, 0
+ %c = insertvalue {i8*, i64} %b, i64 %y, 1
+
+ ret { i8*, i64} %c
+}
+
+; CHECK: test_pair_conv_extract:
+; CHECK: jmp _testp ## TAILCALL
+
+
+
+; PR13006
+define { i64, i64 } @crash(i8* %this) {
+ %c = tail call { i64, i64 } @testp()
+ %mrv7 = insertvalue { i64, i64 } %c, i64 undef, 1
+ ret { i64, i64 } %mrv7
+}
+
+
diff --git a/test/CodeGen/X86/tailcall-cgp-dup.ll b/test/CodeGen/X86/tailcall-cgp-dup.ll
new file mode 100644
index 000000000000..a80b90f9eee2
--- /dev/null
+++ b/test/CodeGen/X86/tailcall-cgp-dup.ll
@@ -0,0 +1,87 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+; Teach CGP to dup returns to enable tail call optimization.
+; rdar://9147433
+
+define i32 @foo(i32 %x) nounwind ssp {
+; CHECK: foo:
+entry:
+ switch i32 %x, label %return [
+ i32 1, label %sw.bb
+ i32 2, label %sw.bb1
+ i32 3, label %sw.bb3
+ i32 4, label %sw.bb5
+ i32 5, label %sw.bb7
+ i32 6, label %sw.bb9
+ ]
+
+sw.bb: ; preds = %entry
+; CHECK: jmp _f1
+ %call = tail call i32 @f1() nounwind
+ br label %return
+
+sw.bb1: ; preds = %entry
+; CHECK: jmp _f2
+ %call2 = tail call i32 @f2() nounwind
+ br label %return
+
+sw.bb3: ; preds = %entry
+; CHECK: jmp _f3
+ %call4 = tail call i32 @f3() nounwind
+ br label %return
+
+sw.bb5: ; preds = %entry
+; CHECK: jmp _f4
+ %call6 = tail call i32 @f4() nounwind
+ br label %return
+
+sw.bb7: ; preds = %entry
+; CHECK: jmp _f5
+ %call8 = tail call i32 @f5() nounwind
+ br label %return
+
+sw.bb9: ; preds = %entry
+; CHECK: jmp _f6
+ %call10 = tail call i32 @f6() nounwind
+ br label %return
+
+return: ; preds = %entry, %sw.bb9, %sw.bb7, %sw.bb5, %sw.bb3, %sw.bb1, %sw.bb
+ %retval.0 = phi i32 [ %call10, %sw.bb9 ], [ %call8, %sw.bb7 ], [ %call6, %sw.bb5 ], [ %call4, %sw.bb3 ], [ %call2, %sw.bb1 ], [ %call, %sw.bb ], [ 0, %entry ]
+ ret i32 %retval.0
+}
+
+declare i32 @f1()
+
+declare i32 @f2()
+
+declare i32 @f3()
+
+declare i32 @f4()
+
+declare i32 @f5()
+
+declare i32 @f6()
+
+; rdar://11958338
+%0 = type opaque
+
+declare i8* @bar(i8*) uwtable optsize noinline ssp
+
+define hidden %0* @thingWithValue(i8* %self) uwtable ssp {
+entry:
+; CHECK: thingWithValue:
+; CHECK: jmp _bar
+ br i1 undef, label %if.then.i, label %if.else.i
+
+if.then.i: ; preds = %entry
+ br label %someThingWithValue.exit
+
+if.else.i: ; preds = %entry
+ %call4.i = tail call i8* @bar(i8* undef) optsize
+ br label %someThingWithValue.exit
+
+someThingWithValue.exit: ; preds = %if.else.i, %if.then.i
+ %retval.0.in.i = phi i8* [ undef, %if.then.i ], [ %call4.i, %if.else.i ]
+ %retval.0.i = bitcast i8* %retval.0.in.i to %0*
+ ret %0* %retval.0.i
+}
diff --git a/test/CodeGen/X86/tailcall-i1.ll b/test/CodeGen/X86/tailcall-i1.ll
deleted file mode 100644
index 8ef1f11383be..000000000000
--- a/test/CodeGen/X86/tailcall-i1.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
-define fastcc i1 @i1test(i32, i32, i32, i32) {
- entry:
- %4 = tail call fastcc i1 @i1test( i32 %0, i32 %1, i32 %2, i32 %3)
- ret i1 %4
-}
diff --git a/test/CodeGen/X86/tailcall-largecode.ll b/test/CodeGen/X86/tailcall-largecode.ll
index c3f4278aecbe..e9b8721e6608 100644
--- a/test/CodeGen/X86/tailcall-largecode.ll
+++ b/test/CodeGen/X86/tailcall-largecode.ll
@@ -49,6 +49,11 @@ define fastcc i32 @direct_manyargs() {
; CHECK: pushq
; Pass the stack argument.
; CHECK: movl $7, 16(%rsp)
+; This is the large code model, so &manyargs_callee may not fit into
+; the jmp instruction. Put it into a register which won't be clobbered
+; while restoring callee-saved registers and won't be used for passing
+; arguments.
+; CHECK: movabsq $manyargs_callee, %rax
; Pass the register arguments, in the right registers.
; CHECK: movl $1, %edi
; CHECK: movl $2, %esi
@@ -56,11 +61,6 @@ define fastcc i32 @direct_manyargs() {
; CHECK: movl $4, %ecx
; CHECK: movl $5, %r8d
; CHECK: movl $6, %r9d
-; This is the large code model, so &manyargs_callee may not fit into
-; the jmp instruction. Put it into R11, which won't be clobbered
-; while restoring callee-saved registers and won't be used for passing
-; arguments.
-; CHECK: movabsq $manyargs_callee, %rax
; Adjust the stack to "return".
; CHECK: popq
; And tail-call to the target.
diff --git a/test/CodeGen/X86/tailcall-void.ll b/test/CodeGen/X86/tailcall-void.ll
deleted file mode 100644
index 4e578d1b6410..000000000000
--- a/test/CodeGen/X86/tailcall-void.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
-define fastcc void @i1test(i32, i32, i32, i32) {
- entry:
- tail call fastcc void @i1test( i32 %0, i32 %1, i32 %2, i32 %3)
- ret void
-}
diff --git a/test/CodeGen/X86/tailcall1.ll b/test/CodeGen/X86/tailcall.ll
index f7ff5d5308d6..36a38e0b69d0 100644
--- a/test/CodeGen/X86/tailcall1.ll
+++ b/test/CodeGen/X86/tailcall.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL | count 5
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL | count 7
; With -tailcallopt, CodeGen guarantees a tail call optimization
; for all of these.
@@ -38,3 +38,15 @@ define fastcc i32 @noret() nounwind {
tail call fastcc void @does_not_return()
unreachable
}
+
+define fastcc void @void_test(i32, i32, i32, i32) {
+ entry:
+ tail call fastcc void @void_test( i32 %0, i32 %1, i32 %2, i32 %3)
+ ret void
+}
+
+define fastcc i1 @i1test(i32, i32, i32, i32) {
+ entry:
+ %4 = tail call fastcc i1 @i1test( i32 %0, i32 %1, i32 %2, i32 %3)
+ ret i1 %4
+}
diff --git a/test/CodeGen/X86/tailcallbyval.ll b/test/CodeGen/X86/tailcallbyval.ll
index 03d6f9411e68..118eee6ba6cd 100644
--- a/test/CodeGen/X86/tailcallbyval.ll
+++ b/test/CodeGen/X86/tailcallbyval.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
-; RUN: llc < %s -march=x86 -tailcallopt | grep {movl\[\[:space:\]\]*4(%esp), %eax} | count 1
+; RUN: llc < %s -march=x86 -tailcallopt | grep "movl[[:space:]]*4(%esp), %eax" | count 1
%struct.s = type {i32, i32, i32, i32, i32, i32, i32, i32,
i32, i32, i32, i32, i32, i32, i32, i32,
i32, i32, i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/targetLoweringGeneric.ll b/test/CodeGen/X86/targetLoweringGeneric.ll
new file mode 100644
index 000000000000..ba5f8f83619f
--- /dev/null
+++ b/test/CodeGen/X86/targetLoweringGeneric.ll
@@ -0,0 +1,38 @@
+; RUN: llc -mtriple=i386-apple-darwin9 -fast-isel=false -O0 < %s | FileCheck %s
+
+; Gather non-machine specific tests for the transformations in
+; CodeGen/SelectionDAG/TargetLowering. Currently, these
+; can't be tested easily by checking the SDNodes that are
+; the data structures that these transformations act on.
+; Therefore, use X86 assembler output to check against.
+
+; rdar://11195364 A problem with the transformation:
+; If all of the demanded bits on one side are known, and all of the set
+; bits on that side are also known to be set on the other side, turn this
+; into an AND, as we know the bits will be cleared.
+; The known set (one) bits for the arguments %xor1 are not the same, so the
+; transformation should not occur
+define void @foo(i32 %i32In1, i32 %i32In2, i32 %i32In3, i32 %i32In4,
+ i32 %i32In5, i32 %i32In6, i32* %i32StarOut, i1 %i1In1,
+ i32* %i32SelOut) nounwind {
+ %and3 = and i32 %i32In1, 1362779777
+ %or2 = or i32 %i32In2, %i32In3
+ %and2 = and i32 %or2, 1362779777
+ %xor3 = xor i32 %and3, %and2
+ ; CHECK: shll
+ %shl1 = shl i32 %xor3, %i32In4
+ %sub1 = sub i32 %or2, %shl1
+ %add1 = add i32 %sub1, %i32In5
+ %and1 = and i32 %add1, 1
+ %xor2 = xor i32 %and1, 1
+ %or1 = or i32 %xor2, 364806994 ;0x15BE8352
+ ; CHECK-NOT: andl $96239955
+ %xor1 = xor i32 %or1, 268567040 ;0x10020200
+ ; force an output so not DCE'd
+ store i32 %xor1, i32* %i32StarOut
+ ; force not fast isel by using a select
+ %i32SelVal = select i1 %i1In1, i32 %i32In1, i32 %xor1
+ store i32 %i32SelVal, i32* %i32SelOut
+ ; CHECK: ret
+ ret void
+}
diff --git a/test/CodeGen/X86/thiscall-struct-return.ll b/test/CodeGen/X86/thiscall-struct-return.ll
index a7be48355f69..0507cb890cd2 100644
--- a/test/CodeGen/X86/thiscall-struct-return.ll
+++ b/test/CodeGen/X86/thiscall-struct-return.ll
@@ -10,7 +10,7 @@ declare x86_thiscallcc void @_ZNK1C6MediumEv(%struct.M* noalias sret %agg.result
define void @testv() nounwind {
; CHECK: testv:
-; CHECK: leal
+; CHECK: leal 16(%esp), %esi
; CHECK-NEXT: movl %esi, (%esp)
; CHECK-NEXT: calll _ZN1CC1Ev
; CHECK: leal 8(%esp), %eax
@@ -29,7 +29,7 @@ entry:
define void @test2v() nounwind {
; CHECK: test2v:
-; CHECK: leal
+; CHECK: leal 16(%esp), %esi
; CHECK-NEXT: movl %esi, (%esp)
; CHECK-NEXT: calll _ZN1CC1Ev
; CHECK: leal 8(%esp), %eax
diff --git a/test/CodeGen/X86/tls-local-dynamic.ll b/test/CodeGen/X86/tls-local-dynamic.ll
new file mode 100644
index 000000000000..c5fd16bbec22
--- /dev/null
+++ b/test/CodeGen/X86/tls-local-dynamic.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic | FileCheck %s
+
+@x = internal thread_local global i32 0, align 4
+@y = internal thread_local global i32 0, align 4
+
+; get_x and get_y are here to prevent x and y to be optimized away as 0
+
+define i32* @get_x() {
+entry:
+ ret i32* @x
+; FIXME: This function uses a single thread-local variable,
+; so we might want to fall back to general-dynamic here.
+; CHECK: get_x:
+; CHECK: leaq x@TLSLD(%rip), %rdi
+; CHECK-NEXT: callq __tls_get_addr@PLT
+; CHECK: x@DTPOFF
+}
+
+define i32* @get_y() {
+entry:
+ ret i32* @y
+}
+
+define i32 @f(i32 %i) {
+entry:
+ %cmp = icmp eq i32 %i, 1
+ br i1 %cmp, label %return, label %if.else
+; This bb does not access TLS, so should not call __tls_get_addr.
+; CHECK: f:
+; CHECK-NOT: __tls_get_addr
+; CHECK: je
+
+
+if.else:
+ %0 = load i32* @x, align 4
+ %cmp1 = icmp eq i32 %i, 2
+ br i1 %cmp1, label %if.then2, label %return
+; Now we call __tls_get_addr.
+; CHECK: # %if.else
+; CHECK: leaq x@TLSLD(%rip), %rdi
+; CHECK-NEXT: callq __tls_get_addr@PLT
+; CHECK: x@DTPOFF
+
+
+if.then2:
+ %1 = load i32* @y, align 4
+ %add = add nsw i32 %1, %0
+ br label %return
+; This accesses TLS, but is dominated by the previous block,
+; so should not have to call __tls_get_addr again.
+; CHECK: # %if.then2
+; CHECK-NOT: __tls_get_addr
+; CHECK: y@DTPOFF
+
+
+return:
+ %retval.0 = phi i32 [ %add, %if.then2 ], [ 5, %entry ], [ %0, %if.else ]
+ ret i32 %retval.0
+}
diff --git a/test/CodeGen/X86/tls-models.ll b/test/CodeGen/X86/tls-models.ll
new file mode 100644
index 000000000000..7c527e210a90
--- /dev/null
+++ b/test/CodeGen/X86/tls-models.ll
@@ -0,0 +1,166 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64 %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X64_PIC %s
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X32_PIC %s
+
+; Darwin always uses the same model.
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin | FileCheck -check-prefix=DARWIN %s
+
+@external_gd = external thread_local global i32
+@internal_gd = internal thread_local global i32 42
+
+@external_ld = external thread_local(localdynamic) global i32
+@internal_ld = internal thread_local(localdynamic) global i32 42
+
+@external_ie = external thread_local(initialexec) global i32
+@internal_ie = internal thread_local(initialexec) global i32 42
+
+@external_le = external thread_local(localexec) global i32
+@internal_le = internal thread_local(localexec) global i32 42
+
+; ----- no model specified -----
+
+define i32* @f1() {
+entry:
+ ret i32* @external_gd
+
+ ; Non-PIC code can use initial-exec, PIC code has to use general dynamic.
+ ; X64: f1:
+ ; X64: external_gd@GOTTPOFF
+ ; X32: f1:
+ ; X32: external_gd@INDNTPOFF
+ ; X64_PIC: f1:
+ ; X64_PIC: external_gd@TLSGD
+ ; X32_PIC: f1:
+ ; X32_PIC: external_gd@TLSGD
+ ; DARWIN: f1:
+ ; DARWIN: _external_gd@TLVP
+}
+
+define i32* @f2() {
+entry:
+ ret i32* @internal_gd
+
+ ; Non-PIC code can use local exec, PIC code can use local dynamic.
+ ; X64: f2:
+ ; X64: internal_gd@TPOFF
+ ; X32: f2:
+ ; X32: internal_gd@NTPOFF
+ ; X64_PIC: f2:
+ ; X64_PIC: internal_gd@TLSLD
+ ; X32_PIC: f2:
+ ; X32_PIC: internal_gd@TLSLDM
+ ; DARWIN: f2:
+ ; DARWIN: _internal_gd@TLVP
+}
+
+
+; ----- localdynamic specified -----
+
+define i32* @f3() {
+entry:
+ ret i32* @external_ld
+
+ ; Non-PIC code can use initial exec, PIC code use local dynamic as specified.
+ ; X64: f3:
+ ; X64: external_ld@GOTTPOFF
+ ; X32: f3:
+ ; X32: external_ld@INDNTPOFF
+ ; X64_PIC: f3:
+ ; X64_PIC: external_ld@TLSLD
+ ; X32_PIC: f3:
+ ; X32_PIC: external_ld@TLSLDM
+ ; DARWIN: f3:
+ ; DARWIN: _external_ld@TLVP
+}
+
+define i32* @f4() {
+entry:
+ ret i32* @internal_ld
+
+ ; Non-PIC code can use local exec, PIC code can use local dynamic.
+ ; X64: f4:
+ ; X64: internal_ld@TPOFF
+ ; X32: f4:
+ ; X32: internal_ld@NTPOFF
+ ; X64_PIC: f4:
+ ; X64_PIC: internal_ld@TLSLD
+ ; X32_PIC: f4:
+ ; X32_PIC: internal_ld@TLSLDM
+ ; DARWIN: f4:
+ ; DARWIN: _internal_ld@TLVP
+}
+
+
+; ----- initialexec specified -----
+
+define i32* @f5() {
+entry:
+ ret i32* @external_ie
+
+ ; Non-PIC and PIC code will use initial exec as specified.
+ ; X64: f5:
+ ; X64: external_ie@GOTTPOFF
+ ; X32: f5:
+ ; X32: external_ie@INDNTPOFF
+ ; X64_PIC: f5:
+ ; X64_PIC: external_ie@GOTTPOFF
+ ; X32_PIC: f5:
+ ; X32_PIC: external_ie@GOTNTPOFF
+ ; DARWIN: f5:
+ ; DARWIN: _external_ie@TLVP
+}
+
+define i32* @f6() {
+entry:
+ ret i32* @internal_ie
+
+ ; Non-PIC code can use local exec, PIC code use initial exec as specified.
+ ; X64: f6:
+ ; X64: internal_ie@TPOFF
+ ; X32: f6:
+ ; X32: internal_ie@NTPOFF
+ ; X64_PIC: f6:
+ ; X64_PIC: internal_ie@GOTTPOFF
+ ; X32_PIC: f6:
+ ; X32_PIC: internal_ie@GOTNTPOFF
+ ; DARWIN: f6:
+ ; DARWIN: _internal_ie@TLVP
+}
+
+
+; ----- localexec specified -----
+
+define i32* @f7() {
+entry:
+ ret i32* @external_le
+
+ ; Non-PIC and PIC code will use local exec as specified.
+ ; X64: f7:
+ ; X64: external_le@TPOFF
+ ; X32: f7:
+ ; X32: external_le@NTPOFF
+ ; X64_PIC: f7:
+ ; X64_PIC: external_le@TPOFF
+ ; X32_PIC: f7:
+ ; X32_PIC: external_le@NTPOFF
+ ; DARWIN: f7:
+ ; DARWIN: _external_le@TLVP
+}
+
+define i32* @f8() {
+entry:
+ ret i32* @internal_le
+
+ ; Non-PIC and PIC code will use local exec as specified.
+ ; X64: f8:
+ ; X64: internal_le@TPOFF
+ ; X32: f8:
+ ; X32: internal_le@NTPOFF
+ ; X64_PIC: f8:
+ ; X64_PIC: internal_le@TPOFF
+ ; X32_PIC: f8:
+ ; X32_PIC: internal_le@NTPOFF
+ ; DARWIN: f8:
+ ; DARWIN: _internal_le@TLVP
+}
diff --git a/test/CodeGen/X86/tls-pic.ll b/test/CodeGen/X86/tls-pic.ll
index b83416d4b32b..51c3d2363f8b 100644
--- a/test/CodeGen/X86/tls-pic.ll
+++ b/test/CodeGen/X86/tls-pic.ll
@@ -2,6 +2,8 @@
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X64 %s
@i = thread_local global i32 15
+@j = internal thread_local global i32 42
+@k = internal thread_local global i32 42
define i32 @f1() {
entry:
@@ -64,4 +66,22 @@ entry:
; X64: callq __tls_get_addr@PLT
+define i32 @f5() nounwind {
+entry:
+ %0 = load i32* @j, align 4
+ %1 = load i32* @k, align 4
+ %add = add nsw i32 %0, %1
+ ret i32 %add
+}
+; X32: f5:
+; X32: leal {{[jk]}}@TLSLDM(%ebx)
+; X32-NEXT: calll ___tls_get_addr@PLT
+; X32-NEXT: movl {{[jk]}}@DTPOFF(%eax)
+; X32-NEXT: addl {{[jk]}}@DTPOFF(%eax)
+
+; X64: f5:
+; X64: leaq {{[jk]}}@TLSLD(%rip), %rdi
+; X64-NEXT: callq __tls_get_addr@PLT
+; X64-NEXT: movl {{[jk]}}@DTPOFF(%rax)
+; X64-NEXT: addl {{[jk]}}@DTPOFF(%rax)
diff --git a/test/CodeGen/X86/tls-pie.ll b/test/CodeGen/X86/tls-pie.ll
index e2e58a541a4c..3fca9f5a3791 100644
--- a/test/CodeGen/X86/tls-pie.ll
+++ b/test/CodeGen/X86/tls-pie.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -relocation-model=pic -enable-pie \
+; RUN: llc < %s -march=x86 -mcpu=generic -mtriple=i386-linux-gnu -relocation-model=pic -enable-pie \
; RUN: | FileCheck -check-prefix=X32 %s
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic -enable-pie \
+; RUN: llc < %s -march=x86-64 -mcpu=generic -mtriple=x86_64-linux-gnu -relocation-model=pic -enable-pie \
; RUN: | FileCheck -check-prefix=X64 %s
@i = thread_local global i32 15
@@ -35,7 +35,12 @@ entry:
define i32 @f3() {
; X32: f3:
-; X32: movl i2@INDNTPOFF, %eax
+; X32: calll .L{{[0-9]+}}$pb
+; X32-NEXT: .L{{[0-9]+}}$pb:
+; X32-NEXT: popl %eax
+; X32-NEXT: .Ltmp{{[0-9]+}}:
+; X32-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp{{[0-9]+}}-.L{{[0-9]+}}$pb), %eax
+; X32-NEXT: movl i2@GOTNTPOFF(%eax), %eax
; X32-NEXT: movl %gs:(%eax), %eax
; X32-NEXT: ret
; X64: f3:
@@ -50,8 +55,13 @@ entry:
define i32* @f4() {
; X32: f4:
-; X32: movl %gs:0, %eax
-; X32-NEXT: addl i2@INDNTPOFF, %eax
+; X32: calll .L{{[0-9]+}}$pb
+; X32-NEXT: .L{{[0-9]+}}$pb:
+; X32-NEXT: popl %ecx
+; X32-NEXT: .Ltmp{{[0-9]+}}:
+; X32-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp{{[0-9]+}}-.L{{[0-9]+}}$pb), %ecx
+; X32-NEXT: movl %gs:0, %eax
+; X32-NEXT: addl i2@GOTNTPOFF(%ecx), %eax
; X32-NEXT: ret
; X64: f4:
; X64: movq %fs:0, %rax
diff --git a/test/CodeGen/X86/trap.ll b/test/CodeGen/X86/trap.ll
index 03ae6bfc869e..3f44be0b500c 100644
--- a/test/CodeGen/X86/trap.ll
+++ b/test/CodeGen/X86/trap.ll
@@ -1,9 +1,21 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah | grep ud2
-define i32 @test() noreturn nounwind {
+; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+
+; CHECK: test0:
+; CHECK: ud2
+define i32 @test0() noreturn nounwind {
entry:
tail call void @llvm.trap( )
unreachable
}
+; CHECK: test1:
+; CHECK: int3
+define i32 @test1() noreturn nounwind {
+entry:
+ tail call void @llvm.debugtrap( )
+ unreachable
+}
+
declare void @llvm.trap() nounwind
+declare void @llvm.debugtrap() nounwind
diff --git a/test/CodeGen/X86/trunc-ext-ld-st.ll b/test/CodeGen/X86/trunc-ext-ld-st.ll
index 57d6e97767b8..9877d7be169b 100644
--- a/test/CodeGen/X86/trunc-ext-ld-st.ll
+++ b/test/CodeGen/X86/trunc-ext-ld-st.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mcpu=corei7 -promote-elements -mattr=+sse41 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse41 | FileCheck %s
;CHECK: load_2_i8
; A single 16-bit load
diff --git a/test/CodeGen/X86/twoaddr-coalesce-2.ll b/test/CodeGen/X86/twoaddr-coalesce-2.ll
index 6f16a2548aa6..af6d47af7a0f 100644
--- a/test/CodeGen/X86/twoaddr-coalesce-2.ll
+++ b/test/CodeGen/X86/twoaddr-coalesce-2.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& \
-; RUN: grep {twoaddrinstr} | grep {Number of instructions aggressively commuted}
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn -stats 2>&1 | \
+; RUN: grep "twoaddrinstr" | grep "Number of instructions aggressively commuted"
; rdar://6480363
target triple = "i386-apple-darwin9.6"
diff --git a/test/CodeGen/X86/twoaddr-pass-sink.ll b/test/CodeGen/X86/twoaddr-pass-sink.ll
index 077fee077392..513c304e3bf8 100644
--- a/test/CodeGen/X86/twoaddr-pass-sink.ll
+++ b/test/CodeGen/X86/twoaddr-pass-sink.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& grep {Number of 3-address instructions sunk}
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats 2>&1 | grep "Number of 3-address instructions sunk"
define void @t2(<2 x i64>* %vDct, <2 x i64>* %vYp, i8* %skiplist, <2 x i64> %a1) nounwind {
entry:
diff --git a/test/CodeGen/X86/uint_to_fp.ll b/test/CodeGen/X86/uint_to_fp.ll
index 41ee1947edc4..0536eb05222c 100644
--- a/test/CodeGen/X86/uint_to_fp.ll
+++ b/test/CodeGen/X86/uint_to_fp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah | not grep {sub.*esp}
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep "sub.*esp"
; RUN: llc < %s -march=x86 -mcpu=yonah | grep cvtsi2ss
; rdar://6034396
diff --git a/test/CodeGen/X86/umul-with-carry.ll b/test/CodeGen/X86/umul-with-carry.ll
index 7416051693be..56fdadbf937b 100644
--- a/test/CodeGen/X86/umul-with-carry.ll
+++ b/test/CodeGen/X86/umul-with-carry.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep {jc} | count 1
+; RUN: llc < %s -march=x86 | grep "jc" | count 1
; XFAIL: *
; FIXME: umul-with-overflow not supported yet.
diff --git a/test/CodeGen/X86/unwindraise.ll b/test/CodeGen/X86/unwindraise.ll
new file mode 100644
index 000000000000..a438723d9bd4
--- /dev/null
+++ b/test/CodeGen/X86/unwindraise.ll
@@ -0,0 +1,252 @@
+; RUN: llc < %s -verify-machineinstrs
+; PR13188
+;
+; The _Unwind_RaiseException function can return normally and via eh.return.
+; This causes confusion about the function live-out registers, since the two
+; different ways of returning have different return values.
+;
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-freebsd9.0"
+
+%struct._Unwind_Context = type { [18 x i8*], i8*, i8*, i8*, %struct.dwarf_eh_bases, i64, i64, i64, [18 x i8] }
+%struct.dwarf_eh_bases = type { i8*, i8*, i8* }
+%struct._Unwind_FrameState = type { %struct.frame_state_reg_info, i64, i64, i8*, i32, i8*, i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)*, i64, i64, i64, i8, i8, i8, i8, i8* }
+%struct.frame_state_reg_info = type { [18 x %struct.anon], %struct.frame_state_reg_info* }
+%struct.anon = type { %union.anon, i32 }
+%union.anon = type { i64 }
+%struct._Unwind_Exception = type { i64, void (i32, %struct._Unwind_Exception*)*, i64, i64 }
+
+@dwarf_reg_size_table = external hidden unnamed_addr global [18 x i8], align 16
+
+declare void @abort() noreturn
+
+declare fastcc i32 @uw_frame_state_for(%struct._Unwind_Context*, %struct._Unwind_FrameState*) uwtable
+
+define hidden i32 @_Unwind_RaiseException(%struct._Unwind_Exception* %exc) uwtable {
+entry:
+ %fs.i = alloca %struct._Unwind_FrameState, align 8
+ %this_context = alloca %struct._Unwind_Context, align 8
+ %cur_context = alloca %struct._Unwind_Context, align 8
+ %fs = alloca %struct._Unwind_FrameState, align 8
+ call void @llvm.eh.unwind.init()
+ %0 = call i8* @llvm.eh.dwarf.cfa(i32 0)
+ %1 = call i8* @llvm.returnaddress(i32 0)
+ call fastcc void @uw_init_context_1(%struct._Unwind_Context* %this_context, i8* %0, i8* %1)
+ %2 = bitcast %struct._Unwind_Context* %cur_context to i8*
+ %3 = bitcast %struct._Unwind_Context* %this_context to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 240, i32 8, i1 false)
+ %personality = getelementptr inbounds %struct._Unwind_FrameState* %fs, i64 0, i32 6
+ %retaddr_column.i = getelementptr inbounds %struct._Unwind_FrameState* %fs, i64 0, i32 9
+ %flags.i.i.i.i = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 5
+ %ra.i = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 2
+ %exception_class = getelementptr inbounds %struct._Unwind_Exception* %exc, i64 0, i32 0
+ br label %while.body
+
+while.body: ; preds = %uw_update_context.exit, %entry
+ %call = call fastcc i32 @uw_frame_state_for(%struct._Unwind_Context* %cur_context, %struct._Unwind_FrameState* %fs)
+ switch i32 %call, label %do.end21 [
+ i32 5, label %do.end21.loopexit46
+ i32 0, label %if.end3
+ ]
+
+if.end3: ; preds = %while.body
+ %4 = load i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)** %personality, align 8, !tbaa !0
+ %tobool = icmp eq i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)* %4, null
+ br i1 %tobool, label %if.end13, label %if.then4
+
+if.then4: ; preds = %if.end3
+ %5 = load i64* %exception_class, align 8, !tbaa !3
+ %call6 = call i32 %4(i32 1, i32 1, i64 %5, %struct._Unwind_Exception* %exc, %struct._Unwind_Context* %cur_context)
+ switch i32 %call6, label %do.end21.loopexit46 [
+ i32 6, label %while.end
+ i32 8, label %if.end13
+ ]
+
+if.end13: ; preds = %if.then4, %if.end3
+ call fastcc void @uw_update_context_1(%struct._Unwind_Context* %cur_context, %struct._Unwind_FrameState* %fs)
+ %6 = load i64* %retaddr_column.i, align 8, !tbaa !3
+ %conv.i = trunc i64 %6 to i32
+ %cmp.i.i.i = icmp slt i32 %conv.i, 18
+ br i1 %cmp.i.i.i, label %cond.end.i.i.i, label %cond.true.i.i.i
+
+cond.true.i.i.i: ; preds = %if.end13
+ call void @abort() noreturn
+ unreachable
+
+cond.end.i.i.i: ; preds = %if.end13
+ %sext.i = shl i64 %6, 32
+ %idxprom.i.i.i = ashr exact i64 %sext.i, 32
+ %arrayidx.i.i.i = getelementptr inbounds [18 x i8]* @dwarf_reg_size_table, i64 0, i64 %idxprom.i.i.i
+ %7 = load i8* %arrayidx.i.i.i, align 1, !tbaa !1
+ %arrayidx2.i.i.i = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 0, i64 %idxprom.i.i.i
+ %8 = load i8** %arrayidx2.i.i.i, align 8, !tbaa !0
+ %9 = load i64* %flags.i.i.i.i, align 8, !tbaa !3
+ %and.i.i.i.i = and i64 %9, 4611686018427387904
+ %tobool.i.i.i = icmp eq i64 %and.i.i.i.i, 0
+ br i1 %tobool.i.i.i, label %if.end.i.i.i, label %land.lhs.true.i.i.i
+
+land.lhs.true.i.i.i: ; preds = %cond.end.i.i.i
+ %arrayidx4.i.i.i = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 8, i64 %idxprom.i.i.i
+ %10 = load i8* %arrayidx4.i.i.i, align 1, !tbaa !1
+ %tobool6.i.i.i = icmp eq i8 %10, 0
+ br i1 %tobool6.i.i.i, label %if.end.i.i.i, label %if.then.i.i.i
+
+if.then.i.i.i: ; preds = %land.lhs.true.i.i.i
+ %11 = ptrtoint i8* %8 to i64
+ br label %uw_update_context.exit
+
+if.end.i.i.i: ; preds = %land.lhs.true.i.i.i, %cond.end.i.i.i
+ %cmp8.i.i.i = icmp eq i8 %7, 8
+ br i1 %cmp8.i.i.i, label %if.then10.i.i.i, label %cond.true14.i.i.i
+
+if.then10.i.i.i: ; preds = %if.end.i.i.i
+ %12 = bitcast i8* %8 to i64*
+ %13 = load i64* %12, align 8, !tbaa !3
+ br label %uw_update_context.exit
+
+cond.true14.i.i.i: ; preds = %if.end.i.i.i
+ call void @abort() noreturn
+ unreachable
+
+uw_update_context.exit: ; preds = %if.then10.i.i.i, %if.then.i.i.i
+ %retval.0.i.i.i = phi i64 [ %11, %if.then.i.i.i ], [ %13, %if.then10.i.i.i ]
+ %14 = inttoptr i64 %retval.0.i.i.i to i8*
+ store i8* %14, i8** %ra.i, align 8, !tbaa !0
+ br label %while.body
+
+while.end: ; preds = %if.then4
+ %private_1 = getelementptr inbounds %struct._Unwind_Exception* %exc, i64 0, i32 2
+ store i64 0, i64* %private_1, align 8, !tbaa !3
+ %15 = load i8** %ra.i, align 8, !tbaa !0
+ %16 = ptrtoint i8* %15 to i64
+ %private_2 = getelementptr inbounds %struct._Unwind_Exception* %exc, i64 0, i32 3
+ store i64 %16, i64* %private_2, align 8, !tbaa !3
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 240, i32 8, i1 false)
+ %17 = bitcast %struct._Unwind_FrameState* %fs.i to i8*
+ call void @llvm.lifetime.start(i64 -1, i8* %17)
+ %personality.i = getelementptr inbounds %struct._Unwind_FrameState* %fs.i, i64 0, i32 6
+ %retaddr_column.i22 = getelementptr inbounds %struct._Unwind_FrameState* %fs.i, i64 0, i32 9
+ br label %while.body.i
+
+while.body.i: ; preds = %uw_update_context.exit44, %while.end
+ %call.i = call fastcc i32 @uw_frame_state_for(%struct._Unwind_Context* %cur_context, %struct._Unwind_FrameState* %fs.i)
+ %18 = load i8** %ra.i, align 8, !tbaa !0
+ %19 = ptrtoint i8* %18 to i64
+ %20 = load i64* %private_2, align 8, !tbaa !3
+ %cmp.i = icmp eq i64 %19, %20
+ %cmp2.i = icmp eq i32 %call.i, 0
+ br i1 %cmp2.i, label %if.end.i, label %do.end21
+
+if.end.i: ; preds = %while.body.i
+ %21 = load i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)** %personality.i, align 8, !tbaa !0
+ %tobool.i = icmp eq i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)* %21, null
+ br i1 %tobool.i, label %if.end12.i, label %if.then3.i
+
+if.then3.i: ; preds = %if.end.i
+ %or.i = select i1 %cmp.i, i32 6, i32 2
+ %22 = load i64* %exception_class, align 8, !tbaa !3
+ %call5.i = call i32 %21(i32 1, i32 %or.i, i64 %22, %struct._Unwind_Exception* %exc, %struct._Unwind_Context* %cur_context)
+ switch i32 %call5.i, label %do.end21 [
+ i32 7, label %do.body19
+ i32 8, label %if.end12.i
+ ]
+
+if.end12.i: ; preds = %if.then3.i, %if.end.i
+ br i1 %cmp.i, label %cond.true.i, label %cond.end.i
+
+cond.true.i: ; preds = %if.end12.i
+ call void @abort() noreturn
+ unreachable
+
+cond.end.i: ; preds = %if.end12.i
+ call fastcc void @uw_update_context_1(%struct._Unwind_Context* %cur_context, %struct._Unwind_FrameState* %fs.i)
+ %23 = load i64* %retaddr_column.i22, align 8, !tbaa !3
+ %conv.i23 = trunc i64 %23 to i32
+ %cmp.i.i.i24 = icmp slt i32 %conv.i23, 18
+ br i1 %cmp.i.i.i24, label %cond.end.i.i.i33, label %cond.true.i.i.i25
+
+cond.true.i.i.i25: ; preds = %cond.end.i
+ call void @abort() noreturn
+ unreachable
+
+cond.end.i.i.i33: ; preds = %cond.end.i
+ %sext.i26 = shl i64 %23, 32
+ %idxprom.i.i.i27 = ashr exact i64 %sext.i26, 32
+ %arrayidx.i.i.i28 = getelementptr inbounds [18 x i8]* @dwarf_reg_size_table, i64 0, i64 %idxprom.i.i.i27
+ %24 = load i8* %arrayidx.i.i.i28, align 1, !tbaa !1
+ %arrayidx2.i.i.i29 = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 0, i64 %idxprom.i.i.i27
+ %25 = load i8** %arrayidx2.i.i.i29, align 8, !tbaa !0
+ %26 = load i64* %flags.i.i.i.i, align 8, !tbaa !3
+ %and.i.i.i.i31 = and i64 %26, 4611686018427387904
+ %tobool.i.i.i32 = icmp eq i64 %and.i.i.i.i31, 0
+ br i1 %tobool.i.i.i32, label %if.end.i.i.i39, label %land.lhs.true.i.i.i36
+
+land.lhs.true.i.i.i36: ; preds = %cond.end.i.i.i33
+ %arrayidx4.i.i.i34 = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 8, i64 %idxprom.i.i.i27
+ %27 = load i8* %arrayidx4.i.i.i34, align 1, !tbaa !1
+ %tobool6.i.i.i35 = icmp eq i8 %27, 0
+ br i1 %tobool6.i.i.i35, label %if.end.i.i.i39, label %if.then.i.i.i37
+
+if.then.i.i.i37: ; preds = %land.lhs.true.i.i.i36
+ %28 = ptrtoint i8* %25 to i64
+ br label %uw_update_context.exit44
+
+if.end.i.i.i39: ; preds = %land.lhs.true.i.i.i36, %cond.end.i.i.i33
+ %cmp8.i.i.i38 = icmp eq i8 %24, 8
+ br i1 %cmp8.i.i.i38, label %if.then10.i.i.i40, label %cond.true14.i.i.i41
+
+if.then10.i.i.i40: ; preds = %if.end.i.i.i39
+ %29 = bitcast i8* %25 to i64*
+ %30 = load i64* %29, align 8, !tbaa !3
+ br label %uw_update_context.exit44
+
+cond.true14.i.i.i41: ; preds = %if.end.i.i.i39
+ call void @abort() noreturn
+ unreachable
+
+uw_update_context.exit44: ; preds = %if.then10.i.i.i40, %if.then.i.i.i37
+ %retval.0.i.i.i42 = phi i64 [ %28, %if.then.i.i.i37 ], [ %30, %if.then10.i.i.i40 ]
+ %31 = inttoptr i64 %retval.0.i.i.i42 to i8*
+ store i8* %31, i8** %ra.i, align 8, !tbaa !0
+ br label %while.body.i
+
+do.body19: ; preds = %if.then3.i
+ call void @llvm.lifetime.end(i64 -1, i8* %17)
+ %call20 = call fastcc i64 @uw_install_context_1(%struct._Unwind_Context* %this_context, %struct._Unwind_Context* %cur_context)
+ %32 = load i8** %ra.i, align 8, !tbaa !0
+ call void @llvm.eh.return.i64(i64 %call20, i8* %32)
+ unreachable
+
+do.end21.loopexit46: ; preds = %if.then4, %while.body
+ %retval.0.ph = phi i32 [ 3, %if.then4 ], [ 5, %while.body ]
+ br label %do.end21
+
+do.end21: ; preds = %do.end21.loopexit46, %if.then3.i, %while.body.i, %while.body
+ %retval.0 = phi i32 [ %retval.0.ph, %do.end21.loopexit46 ], [ 3, %while.body ], [ 2, %while.body.i ], [ 2, %if.then3.i ]
+ ret i32 %retval.0
+}
+
+declare void @llvm.eh.unwind.init() nounwind
+
+declare fastcc void @uw_init_context_1(%struct._Unwind_Context*, i8*, i8*) uwtable
+
+declare i8* @llvm.eh.dwarf.cfa(i32) nounwind
+
+declare i8* @llvm.returnaddress(i32) nounwind readnone
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+declare fastcc i64 @uw_install_context_1(%struct._Unwind_Context*, %struct._Unwind_Context*) uwtable
+
+declare void @llvm.eh.return.i64(i64, i8*) nounwind
+
+declare fastcc void @uw_update_context_1(%struct._Unwind_Context*, %struct._Unwind_FrameState* nocapture) uwtable
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+
+declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"long", metadata !1}
diff --git a/test/CodeGen/X86/v-binop-widen2.ll b/test/CodeGen/X86/v-binop-widen2.ll
index ae3f55a316fa..569586af4983 100644
--- a/test/CodeGen/X86/v-binop-widen2.ll
+++ b/test/CodeGen/X86/v-binop-widen2.ll
@@ -1,9 +1,16 @@
-; RUN: llc -march=x86 -mattr=+sse < %s | FileCheck %s
+; RUN: llc -march=x86 -mcpu=generic -mattr=+sse < %s | FileCheck %s
+; RUN: llc -march=x86 -mcpu=atom -mattr=+sse < %s | FileCheck -check-prefix=ATOM %s
%vec = type <6 x float>
; CHECK: divss
; CHECK: divss
; CHECK: divps
+
+; Scheduler causes a different instruction order to be produced on Intel Atom
+; ATOM: divps
+; ATOM: divss
+; ATOM: divss
+
define %vec @vecdiv( %vec %p1, %vec %p2)
{
%result = fdiv %vec %p1, %p2
diff --git a/test/CodeGen/X86/vec_call.ll b/test/CodeGen/X86/vec_call.ll
index f2fc7e7d9d5d..e0862ca8d1c4 100644
--- a/test/CodeGen/X86/vec_call.ll
+++ b/test/CodeGen/X86/vec_call.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
-; RUN: grep {subl.*60}
+; RUN: grep "subl.*60"
; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
-; RUN: grep {movaps.*32}
+; RUN: grep "movaps.*32"
define void @test() {
diff --git a/test/CodeGen/X86/vec_cast2.ll b/test/CodeGen/X86/vec_cast2.ll
new file mode 100644
index 000000000000..08eb16f6313b
--- /dev/null
+++ b/test/CodeGen/X86/vec_cast2.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+;CHECK: foo1_8
+;CHECK: vcvtdq2ps
+;CHECK: ret
+define <8 x float> @foo1_8(<8 x i8> %src) {
+ %res = sitofp <8 x i8> %src to <8 x float>
+ ret <8 x float> %res
+}
+
+;CHECK: foo1_4
+;CHECK: vcvtdq2ps
+;CHECK: ret
+define <4 x float> @foo1_4(<4 x i8> %src) {
+ %res = sitofp <4 x i8> %src to <4 x float>
+ ret <4 x float> %res
+}
+
+;CHECK: foo2_8
+;CHECK: vcvtdq2ps
+;CHECK: ret
+define <8 x float> @foo2_8(<8 x i8> %src) {
+ %res = uitofp <8 x i8> %src to <8 x float>
+ ret <8 x float> %res
+}
+
+;CHECK: foo2_4
+;CHECK: vcvtdq2ps
+;CHECK: ret
+define <4 x float> @foo2_4(<4 x i8> %src) {
+ %res = uitofp <4 x i8> %src to <4 x float>
+ ret <4 x float> %res
+}
+
+;CHECK: foo3_8
+;CHECK: vcvttps2dq
+;CHECK: ret
+define <8 x i8> @foo3_8(<8 x float> %src) {
+ %res = fptosi <8 x float> %src to <8 x i8>
+ ret <8 x i8> %res
+}
+;CHECK: foo3_4
+;CHECK: vcvttps2dq
+;CHECK: ret
+define <4 x i8> @foo3_4(<4 x float> %src) {
+ %res = fptosi <4 x float> %src to <4 x i8>
+ ret <4 x i8> %res
+}
+
diff --git a/test/CodeGen/X86/vec_compare-2.ll b/test/CodeGen/X86/vec_compare-2.ll
index 91777f7aa6b4..46d6a23554f4 100644
--- a/test/CodeGen/X86/vec_compare-2.ll
+++ b/test/CodeGen/X86/vec_compare-2.ll
@@ -10,8 +10,7 @@ define void @blackDespeckle_wrapper(i8** %args_list, i64* %gtid, i64 %xend) {
entry:
; CHECK: cfi_def_cfa_offset
; CHECK-NOT: set
-; CHECK: movzwl
-; CHECK: movzwl
+; CHECK: punpcklwd
; CHECK: pshufd
; CHECK: pshufb
%shr.i = ashr <4 x i32> zeroinitializer, <i32 3, i32 3, i32 3, i32 3> ; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_compare.ll b/test/CodeGen/X86/vec_compare.ll
index 39c9b770d5f4..367dd27f3076 100644
--- a/test/CodeGen/X86/vec_compare.ll
+++ b/test/CodeGen/X86/vec_compare.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i386-apple-darwin | FileCheck %s
define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
diff --git a/test/CodeGen/X86/vec_ins_extract-1.ll b/test/CodeGen/X86/vec_ins_extract-1.ll
index 29511934af01..565be7a6cc70 100644
--- a/test/CodeGen/X86/vec_ins_extract-1.ll
+++ b/test/CodeGen/X86/vec_ins_extract-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah | grep {(%esp,%eax,4)} | count 4
+; RUN: llc < %s -march=x86 -mcpu=yonah | grep "(%esp,%eax,4)" | count 4
; Inserts and extracts with variable indices must be lowered
; to memory accesses.
diff --git a/test/CodeGen/X86/vec_insert-6.ll b/test/CodeGen/X86/vec_insert-6.ll
index de3b36ff126c..2a4864a48a25 100644
--- a/test/CodeGen/X86/vec_insert-6.ll
+++ b/test/CodeGen/X86/vec_insert-6.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pslldq
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 6
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn | grep pslldq
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn -mtriple=i686-apple-darwin9 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 6
define <4 x float> @t3(<4 x float>* %P) nounwind {
%tmp1 = load <4 x float>* %P
diff --git a/test/CodeGen/X86/vec_set-3.ll b/test/CodeGen/X86/vec_set-3.ll
index ada17e0092a8..d1d7608a0411 100644
--- a/test/CodeGen/X86/vec_set-3.ll
+++ b/test/CodeGen/X86/vec_set-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn -o %t
; RUN: grep pshufd %t | count 2
define <4 x float> @test(float %a) nounwind {
diff --git a/test/CodeGen/X86/vec_set-9.ll b/test/CodeGen/X86/vec_set-9.ll
index 3656e5f6ca47..b8ec0cf08095 100644
--- a/test/CodeGen/X86/vec_set-9.ll
+++ b/test/CodeGen/X86/vec_set-9.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86-64 | grep movd | count 1
-; RUN: llc < %s -march=x86-64 | grep {movlhps.*%xmm0, %xmm0}
+; RUN: llc < %s -march=x86-64 | grep "movlhps.*%xmm0, %xmm0"
define <2 x i64> @test3(i64 %A) nounwind {
entry:
diff --git a/test/CodeGen/X86/vec_shuffle-16.ll b/test/CodeGen/X86/vec_shuffle-16.ll
index 06f38ed84272..09d4c1a64a01 100644
--- a/test/CodeGen/X86/vec_shuffle-16.ll
+++ b/test/CodeGen/X86/vec_shuffle-16.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 -mattr=+sse,-sse2 -mtriple=i386-apple-darwin | FileCheck %s -check-prefix=sse
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin | FileCheck %s -check-prefix=sse2
+; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=+sse,-sse2 -mtriple=i386-apple-darwin | FileCheck %s -check-prefix=sse
+; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=+sse2 -mtriple=i386-apple-darwin | FileCheck %s -check-prefix=sse2
; sse: t1:
; sse2: t1:
diff --git a/test/CodeGen/X86/vec_shuffle-19.ll b/test/CodeGen/X86/vec_shuffle-19.ll
index 861a1cc5b93c..b26f920e5e23 100644
--- a/test/CodeGen/X86/vec_shuffle-19.ll
+++ b/test/CodeGen/X86/vec_shuffle-19.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 4
+; RUN: llc < %s -o /dev/null -march=x86 -mcpu=penryn -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 4
; PR2485
define <4 x i32> @t(<4 x i32> %a, <4 x i32> %b) nounwind {
diff --git a/test/CodeGen/X86/vec_shuffle-27.ll b/test/CodeGen/X86/vec_shuffle-27.ll
index dec98c7400a5..0aff822850c0 100644
--- a/test/CodeGen/X86/vec_shuffle-27.ll
+++ b/test/CodeGen/X86/vec_shuffle-27.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=sse41 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=sse41 | FileCheck %s
; ModuleID = 'vec_shuffle-27.bc'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
@@ -35,4 +35,4 @@ entry:
store <4 x i64> %vect1487, <4 x i64>* %ap
store <4 x i64> %vect1488, <4 x i64>* %bp
ret void;
-} \ No newline at end of file
+}
diff --git a/test/CodeGen/X86/vec_shuffle-35.ll b/test/CodeGen/X86/vec_shuffle-35.ll
index 7f0fcb5969e4..f5083b4b8011 100644
--- a/test/CodeGen/X86/vec_shuffle-35.ll
+++ b/test/CodeGen/X86/vec_shuffle-35.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -march=x86 -mcpu=yonah -stack-alignment=16 -o %t
-; RUN: grep pextrw %t | count 13
-; RUN: grep pinsrw %t | count 14
+; RUN: grep pextrw %t | count 12
+; RUN: grep pinsrw %t | count 13
; RUN: grep rolw %t | count 13
; RUN: not grep esp %t
; RUN: not grep ebp %t
diff --git a/test/CodeGen/X86/vec_shuffle-36.ll b/test/CodeGen/X86/vec_shuffle-36.ll
index 8090afc7434d..9a06015745ed 100644
--- a/test/CodeGen/X86/vec_shuffle-36.ll
+++ b/test/CodeGen/X86/vec_shuffle-36.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=sse41 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=penryn -mattr=sse41 | FileCheck %s
define <8 x i16> @shuf6(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
; CHECK: pshufb
diff --git a/test/CodeGen/X86/vec_shuffle-37.ll b/test/CodeGen/X86/vec_shuffle-37.ll
index 430aa046afab..ed285f93fe1b 100644
--- a/test/CodeGen/X86/vec_shuffle-37.ll
+++ b/test/CodeGen/X86/vec_shuffle-37.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=core2 | FileCheck %s
; RUN: llc -O0 < %s -march=x86 -mcpu=core2 | FileCheck %s --check-prefix=CHECK_O0
define <4 x i32> @t00(<4 x i32>* %a0) nounwind ssp {
diff --git a/test/CodeGen/X86/vec_shuffle-38.ll b/test/CodeGen/X86/vec_shuffle-38.ll
index 96ef883c4e1e..ec196df7aeff 100644
--- a/test/CodeGen/X86/vec_shuffle-38.ll
+++ b/test/CodeGen/X86/vec_shuffle-38.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
define <2 x double> @ld(<2 x double> %p) nounwind optsize ssp {
; CHECK: unpcklpd
diff --git a/test/CodeGen/X86/vec_shuffle-39.ll b/test/CodeGen/X86/vec_shuffle-39.ll
index 55531e305cb8..ee8d2d5e0b3e 100644
--- a/test/CodeGen/X86/vec_shuffle-39.ll
+++ b/test/CodeGen/X86/vec_shuffle-39.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn | FileCheck %s
; rdar://10050222, rdar://10134392
define <4 x float> @t1(<4 x float> %a, <1 x i64>* nocapture %p) nounwind {
diff --git a/test/CodeGen/X86/vec_splat-2.ll b/test/CodeGen/X86/vec_splat-2.ll
index cde5ae99563e..f105de4d977d 100644
--- a/test/CodeGen/X86/vec_splat-2.ll
+++ b/test/CodeGen/X86/vec_splat-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pshufd | count 1
+; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=+sse2 | grep pshufd | count 1
define void @test(<2 x i64>* %P, i8 %x) nounwind {
%tmp = insertelement <16 x i8> zeroinitializer, i8 %x, i32 0 ; <<16 x i8>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_splat-3.ll b/test/CodeGen/X86/vec_splat-3.ll
index 649b85c5dadd..feacc42406df 100644
--- a/test/CodeGen/X86/vec_splat-3.ll
+++ b/test/CodeGen/X86/vec_splat-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
+; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=sse41 -o %t
; RUN: grep punpcklwd %t | count 4
; RUN: grep punpckhwd %t | count 4
; RUN: grep "pshufd" %t | count 8
diff --git a/test/CodeGen/X86/vec_splat-4.ll b/test/CodeGen/X86/vec_splat-4.ll
index d9941e65bde3..374acfa4e094 100644
--- a/test/CodeGen/X86/vec_splat-4.ll
+++ b/test/CodeGen/X86/vec_splat-4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
+; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=sse41 -o %t
; RUN: grep punpcklbw %t | count 16
; RUN: grep punpckhbw %t | count 16
; RUN: grep "pshufd" %t | count 16
diff --git a/test/CodeGen/X86/vec_splat.ll b/test/CodeGen/X86/vec_splat.ll
index a87fbd0dc655..24d8487f17bd 100644
--- a/test/CodeGen/X86/vec_splat.ll
+++ b/test/CodeGen/X86/vec_splat.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pshufd
-; RUN: llc < %s -march=x86 -mattr=+sse3 | grep movddup
+; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=+sse2 | grep pshufd
+; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=+sse3 | grep movddup
define void @test_v4sf(<4 x float>* %P, <4 x float>* %Q, float %X) nounwind {
%tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0 ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_ss_load_fold.ll b/test/CodeGen/X86/vec_ss_load_fold.ll
index 3bd3f7b60b3b..c294df575a10 100644
--- a/test/CodeGen/X86/vec_ss_load_fold.ll
+++ b/test/CodeGen/X86/vec_ss_load_fold.ll
@@ -70,3 +70,17 @@ define <4 x float> @test4(<4 x float> %A, float *%b, i32 %C) nounwind {
; CHECK: call
; CHECK: roundss $4, %xmm{{.*}}, %xmm0
}
+
+; PR13576
+define <2 x double> @test5() nounwind uwtable readnone noinline {
+entry:
+ %0 = tail call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> <double
+4.569870e+02, double 1.233210e+02>, i32 128) nounwind readnone
+ ret <2 x double> %0
+; CHECK: test5:
+; CHECK: mov
+; CHECK: mov
+; CHECK: cvtsi2sd
+}
+
+declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
diff --git a/test/CodeGen/X86/vshift-1.ll b/test/CodeGen/X86/vshift-1.ll
index 49551562c5ae..e775750bbea5 100644
--- a/test/CodeGen/X86/vshift-1.ll
+++ b/test/CodeGen/X86/vshift-1.ll
@@ -16,7 +16,7 @@ define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
entry:
; CHECK: shift1b:
; CHECK: movd
-; CHECK-NEXT: psllq
+; CHECK: psllq
%0 = insertelement <2 x i64> undef, i64 %amt, i32 0
%1 = insertelement <2 x i64> %0, i64 %amt, i32 1
%shl = shl <2 x i64> %val, %1
@@ -38,7 +38,7 @@ define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
entry:
; CHECK: shift2b:
; CHECK: movd
-; CHECK-NEXT: pslld
+; CHECK: pslld
%0 = insertelement <4 x i32> undef, i32 %amt, i32 0
%1 = insertelement <4 x i32> %0, i32 %amt, i32 1
%2 = insertelement <4 x i32> %1, i32 %amt, i32 2
diff --git a/test/CodeGen/X86/vshift-2.ll b/test/CodeGen/X86/vshift-2.ll
index 9a9b419abea5..9496893bd1a7 100644
--- a/test/CodeGen/X86/vshift-2.ll
+++ b/test/CodeGen/X86/vshift-2.ll
@@ -16,7 +16,7 @@ define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
entry:
; CHECK: shift1b:
; CHECK: movd
-; CHECK-NEXT: psrlq
+; CHECK: psrlq
%0 = insertelement <2 x i64> undef, i64 %amt, i32 0
%1 = insertelement <2 x i64> %0, i64 %amt, i32 1
%lshr = lshr <2 x i64> %val, %1
@@ -37,7 +37,7 @@ define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
entry:
; CHECK: shift2b:
; CHECK: movd
-; CHECK-NEXT: psrld
+; CHECK: psrld
%0 = insertelement <4 x i32> undef, i32 %amt, i32 0
%1 = insertelement <4 x i32> %0, i32 %amt, i32 1
%2 = insertelement <4 x i32> %1, i32 %amt, i32 2
@@ -63,7 +63,7 @@ entry:
; CHECK: shift3b:
; CHECK: movzwl
; CHECK: movd
-; CHECK-NEXT: psrlw
+; CHECK: psrlw
%0 = insertelement <8 x i16> undef, i16 %amt, i32 0
%1 = insertelement <8 x i16> %0, i16 %amt, i32 1
%2 = insertelement <8 x i16> %0, i16 %amt, i32 2
diff --git a/test/CodeGen/X86/vshift-3.ll b/test/CodeGen/X86/vshift-3.ll
index 8e8a9aa04b27..b2b48b9da935 100644
--- a/test/CodeGen/X86/vshift-3.ll
+++ b/test/CodeGen/X86/vshift-3.ll
@@ -28,7 +28,7 @@ define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
entry:
; CHECK: shift2b:
; CHECK: movd
-; CHECK-NEXT: psrad
+; CHECK: psrad
%0 = insertelement <4 x i32> undef, i32 %amt, i32 0
%1 = insertelement <4 x i32> %0, i32 %amt, i32 1
%2 = insertelement <4 x i32> %1, i32 %amt, i32 2
@@ -52,7 +52,7 @@ entry:
; CHECK: shift3b:
; CHECK: movzwl
; CHECK: movd
-; CHECK-NEXT: psraw
+; CHECK: psraw
%0 = insertelement <8 x i16> undef, i16 %amt, i32 0
%1 = insertelement <8 x i16> %0, i16 %amt, i32 1
%2 = insertelement <8 x i16> %0, i16 %amt, i32 2
diff --git a/test/CodeGen/X86/vshift-5.ll b/test/CodeGen/X86/vshift-5.ll
index cb254aeb5735..f6c311dee521 100644
--- a/test/CodeGen/X86/vshift-5.ll
+++ b/test/CodeGen/X86/vshift-5.ll
@@ -6,7 +6,7 @@ define void @shift5a(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind {
entry:
; CHECK: shift5a:
; CHECK: movd
-; CHECK-NEXT: pslld
+; CHECK: pslld
%amt = load i32* %pamt
%tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
%shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -20,7 +20,7 @@ define void @shift5b(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind {
entry:
; CHECK: shift5b:
; CHECK: movd
-; CHECK-NEXT: psrad
+; CHECK: psrad
%amt = load i32* %pamt
%tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
%shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -34,7 +34,7 @@ define void @shift5c(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
entry:
; CHECK: shift5c:
; CHECK: movd
-; CHECK-NEXT: pslld
+; CHECK: pslld
%tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
%shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
%shl = shl <4 x i32> %val, %shamt
@@ -47,7 +47,7 @@ define void @shift5d(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
entry:
; CHECK: shift5d:
; CHECK: movd
-; CHECK-NEXT: psrad
+; CHECK: psrad
%tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
%shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
%shr = ashr <4 x i32> %val, %shamt
diff --git a/test/CodeGen/X86/widen_arith-3.ll b/test/CodeGen/X86/widen_arith-3.ll
index f55b184f3acc..d86042a44806 100644
--- a/test/CodeGen/X86/widen_arith-3.ll
+++ b/test/CodeGen/X86/widen_arith-3.ll
@@ -2,7 +2,6 @@
; CHECK: incl
; CHECK: incl
; CHECK: incl
-; CHECK: addl
; Widen a v3i16 to v8i16 to do a vector add
diff --git a/test/CodeGen/X86/widen_cast-1.ll b/test/CodeGen/X86/widen_cast-1.ll
index 4330aae8ec82..ebdfea9a37f7 100644
--- a/test/CodeGen/X86/widen_cast-1.ll
+++ b/test/CodeGen/X86/widen_cast-1.ll
@@ -1,7 +1,14 @@
-; RUN: llc -march=x86 -mattr=+sse42 < %s | FileCheck %s
+; RUN: llc -march=x86 -mcpu=generic -mattr=+sse42 < %s | FileCheck %s
+; RUN: llc -march=x86 -mcpu=atom -mattr=+sse42 < %s | FileCheck -check-prefix=ATOM %s
+
; CHECK: paddd
-; CHECK: pextrd
-; CHECK: movd
+; CHECK: movl
+; CHECK: movlpd
+
+; Scheduler causes produce a different instruction order
+; ATOM: movl
+; ATOM: paddd
+; ATOM: movlpd
; bitcast a v4i16 to v2i32
diff --git a/test/CodeGen/X86/widen_cast-2.ll b/test/CodeGen/X86/widen_cast-2.ll
index 5c695ea00033..3979ce466d1e 100644
--- a/test/CodeGen/X86/widen_cast-2.ll
+++ b/test/CodeGen/X86/widen_cast-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=nehalem -mattr=+sse42 | FileCheck %s
; CHECK: pextrd
; CHECK: pextrd
; CHECK: movd
diff --git a/test/CodeGen/X86/widen_cast-5.ll b/test/CodeGen/X86/widen_cast-5.ll
index 136578df1e8e..9086d3a9cfd2 100644
--- a/test/CodeGen/X86/widen_cast-5.ll
+++ b/test/CodeGen/X86/widen_cast-5.ll
@@ -1,9 +1,8 @@
; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
; CHECK: movl
-; CHECK: movd
+; CHECK: movlpd
; bitcast a i64 to v2i32
-
define void @convert(<2 x i32>* %dst.addr, i64 %src) nounwind {
entry:
%conv = bitcast i64 %src to <2 x i32>
diff --git a/test/CodeGen/X86/widen_conv-4.ll b/test/CodeGen/X86/widen_conv-4.ll
index affd796ffc3f..1158e0455392 100644
--- a/test/CodeGen/X86/widen_conv-4.ll
+++ b/test/CodeGen/X86/widen_conv-4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=nehalem -mattr=+sse42 | FileCheck %s
; CHECK-NOT: cvtsi2ss
; unsigned to float v7i16 to v7f32
diff --git a/test/CodeGen/X86/widen_extract-1.ll b/test/CodeGen/X86/widen_extract-1.ll
index 4bcac58f2b6c..86727421ce03 100644
--- a/test/CodeGen/X86/widen_extract-1.ll
+++ b/test/CodeGen/X86/widen_extract-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=nehalem -mattr=+sse42 | FileCheck %s
; widen extract subvector
define void @convert(<2 x double>* %dst.addr, <3 x double> %src) {
diff --git a/test/CodeGen/X86/widen_load-0.ll b/test/CodeGen/X86/widen_load-0.ll
index 4aeec9136d0e..d5437281b274 100644
--- a/test/CodeGen/X86/widen_load-0.ll
+++ b/test/CodeGen/X86/widen_load-0.ll
@@ -1,18 +1,12 @@
; RUN: llc < %s -o - -mtriple=x86_64-linux -mcpu=corei7 | FileCheck %s
-; RUN: llc < %s -o - -mtriple=x86_64-win32 -mcpu=corei7 | FileCheck %s -check-prefix=WIN64
; PR4891
; Both loads should happen before either store.
-; CHECK: movd ({{.*}}), {{.*}}
-; CHECK: movd ({{.*}}), {{.*}}
-; CHECK: movd {{.*}}, ({{.*}})
-; CHECK: movd {{.*}}, ({{.*}})
-
-; WIN64: movd ({{.*}}), {{.*}}
-; WIN64: movd ({{.*}}), {{.*}}
-; WIN64: movd {{.*}}, ({{.*}})
-; WIN64: movd {{.*}}, ({{.*}})
+; CHECK: movl ({{.*}}), {{.*}}
+; CHECK: movl ({{.*}}), {{.*}}
+; CHECK: movl {{.*}}, ({{.*}})
+; CHECK: movl {{.*}}, ({{.*}})
define void @short2_int_swap(<2 x i16>* nocapture %b, i32* nocapture %c) nounwind {
entry:
diff --git a/test/CodeGen/X86/win64_alloca_dynalloca.ll b/test/CodeGen/X86/win64_alloca_dynalloca.ll
index a961c6af1884..cc11e4c28e21 100644
--- a/test/CodeGen/X86/win64_alloca_dynalloca.ll
+++ b/test/CodeGen/X86/win64_alloca_dynalloca.ll
@@ -1,12 +1,9 @@
-; RUN: llc < %s -join-physregs -mcpu=generic -mtriple=x86_64-mingw32 | FileCheck %s -check-prefix=M64
-; RUN: llc < %s -join-physregs -mcpu=generic -mtriple=x86_64-win32 | FileCheck %s -check-prefix=W64
-; RUN: llc < %s -join-physregs -mcpu=generic -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-mingw32 | FileCheck %s -check-prefix=M64
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32 | FileCheck %s -check-prefix=W64
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
; PR8777
; PR8778
-; Passing the same value in two registers creates a false interference that
-; only -join-physregs resolves. It could also be handled by a parallel copy.
-
define i64 @foo(i64 %n, i64 %x) nounwind {
entry:
@@ -31,19 +28,19 @@ entry:
%buf1 = alloca i8, i64 %n, align 1
-; M64: leaq 15(%rcx), %rax
+; M64: leaq 15(%{{.*}}), %rax
; M64: andq $-16, %rax
; M64: callq ___chkstk
; M64-NOT: %rsp
; M64: movq %rsp, %rax
-; W64: leaq 15(%rcx), %rax
+; W64: leaq 15(%{{.*}}), %rax
; W64: andq $-16, %rax
; W64: callq __chkstk
; W64: subq %rax, %rsp
; W64: movq %rsp, %rax
-; EFI: leaq 15(%rcx), [[R1:%r.*]]
+; EFI: leaq 15(%{{.*}}), [[R1:%r.*]]
; EFI: andq $-16, [[R1]]
; EFI: movq %rsp, [[R64:%r.*]]
; EFI: subq [[R1]], [[R64]]
diff --git a/test/CodeGen/X86/x86-64-arg.ll b/test/CodeGen/X86/x86-64-arg.ll
index ec8dd8edb634..9a959e839a95 100644
--- a/test/CodeGen/X86/x86-64-arg.ll
+++ b/test/CodeGen/X86/x86-64-arg.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | grep {movl %edi, %eax}
+; RUN: llc < %s | grep "movl %edi, %eax"
; The input value is already sign extended, don't re-extend it.
; This testcase corresponds to:
; int test(short X) { return (int)X; }
diff --git a/test/CodeGen/X86/x86-64-dead-stack-adjust.ll b/test/CodeGen/X86/x86-64-dead-stack-adjust.ll
index 79316f29de37..902c9d5ae081 100644
--- a/test/CodeGen/X86/x86-64-dead-stack-adjust.ll
+++ b/test/CodeGen/X86/x86-64-dead-stack-adjust.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s | not grep rsp
-; RUN: llc < %s | grep cvttsd2siq
+; RUN: llc < %s -mcpu=nehalem | not grep rsp
+; RUN: llc < %s -mcpu=nehalem | grep cvttsd2siq
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-apple-darwin8"
diff --git a/test/CodeGen/X86/x86-64-pic-1.ll b/test/CodeGen/X86/x86-64-pic-1.ll
index 46f6d335d05c..46cd4f81bcf9 100644
--- a/test/CodeGen/X86/x86-64-pic-1.ll
+++ b/test/CodeGen/X86/x86-64-pic-1.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
-; RUN: grep {callq f@PLT} %t1
+; RUN: grep "callq f@PLT" %t1
define void @g() {
entry:
diff --git a/test/CodeGen/X86/x86-64-pic-10.ll b/test/CodeGen/X86/x86-64-pic-10.ll
index b6f82e23b7e7..3ec172b2b656 100644
--- a/test/CodeGen/X86/x86-64-pic-10.ll
+++ b/test/CodeGen/X86/x86-64-pic-10.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
-; RUN: grep {callq g@PLT} %t1
+; RUN: grep "callq g@PLT" %t1
@g = alias weak i32 ()* @f
diff --git a/test/CodeGen/X86/x86-64-pic-11.ll b/test/CodeGen/X86/x86-64-pic-11.ll
index 4db331cee43f..fd64beb696b2 100644
--- a/test/CodeGen/X86/x86-64-pic-11.ll
+++ b/test/CodeGen/X86/x86-64-pic-11.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
-; RUN: grep {callq __fixunsxfti@PLT} %t1
+; RUN: grep "callq __fixunsxfti@PLT" %t1
define i128 @f(x86_fp80 %a) nounwind {
entry:
diff --git a/test/CodeGen/X86/x86-64-pic-2.ll b/test/CodeGen/X86/x86-64-pic-2.ll
index 1ce2de7209c4..f3f7b1dffd1f 100644
--- a/test/CodeGen/X86/x86-64-pic-2.ll
+++ b/test/CodeGen/X86/x86-64-pic-2.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
-; RUN: grep {callq f} %t1
-; RUN: not grep {callq f@PLT} %t1
+; RUN: grep "callq f" %t1
+; RUN: not grep "callq f@PLT" %t1
define void @g() {
entry:
diff --git a/test/CodeGen/X86/x86-64-pic-3.ll b/test/CodeGen/X86/x86-64-pic-3.ll
index aa3c888ed600..ba933788a3a5 100644
--- a/test/CodeGen/X86/x86-64-pic-3.ll
+++ b/test/CodeGen/X86/x86-64-pic-3.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
-; RUN: grep {callq f} %t1
-; RUN: not grep {callq f@PLT} %t1
+; RUN: grep "callq f" %t1
+; RUN: not grep "callq f@PLT" %t1
define void @g() {
entry:
diff --git a/test/CodeGen/X86/x86-64-pic-4.ll b/test/CodeGen/X86/x86-64-pic-4.ll
index 90fc1194a33b..33b08c4b4b04 100644
--- a/test/CodeGen/X86/x86-64-pic-4.ll
+++ b/test/CodeGen/X86/x86-64-pic-4.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
-; RUN: grep {movq a@GOTPCREL(%rip),} %t1
+; RUN: grep "movq a@GOTPCREL(%rip)," %t1
@a = global i32 0
diff --git a/test/CodeGen/X86/x86-64-pic-5.ll b/test/CodeGen/X86/x86-64-pic-5.ll
index 6369bde6943d..234bc0d2f4f1 100644
--- a/test/CodeGen/X86/x86-64-pic-5.ll
+++ b/test/CodeGen/X86/x86-64-pic-5.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
-; RUN: grep {movl a(%rip),} %t1
+; RUN: grep "movl a(%rip)," %t1
; RUN: not grep GOTPCREL %t1
@a = hidden global i32 0
diff --git a/test/CodeGen/X86/x86-64-pic-6.ll b/test/CodeGen/X86/x86-64-pic-6.ll
index 6e19ad35bcf4..ae5b5835928d 100644
--- a/test/CodeGen/X86/x86-64-pic-6.ll
+++ b/test/CodeGen/X86/x86-64-pic-6.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
-; RUN: grep {movl a(%rip),} %t1
+; RUN: grep "movl a(%rip)," %t1
; RUN: not grep GOTPCREL %t1
@a = internal global i32 0
diff --git a/test/CodeGen/X86/x86-64-pic-7.ll b/test/CodeGen/X86/x86-64-pic-7.ll
index 4d98ee614026..de240a38d63a 100644
--- a/test/CodeGen/X86/x86-64-pic-7.ll
+++ b/test/CodeGen/X86/x86-64-pic-7.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
-; RUN: grep {movq f@GOTPCREL(%rip),} %t1
+; RUN: grep "movq f@GOTPCREL(%rip)," %t1
define void ()* @g() nounwind {
entry:
diff --git a/test/CodeGen/X86/x86-64-pic-8.ll b/test/CodeGen/X86/x86-64-pic-8.ll
index d3b567c61076..db35c33623fe 100644
--- a/test/CodeGen/X86/x86-64-pic-8.ll
+++ b/test/CodeGen/X86/x86-64-pic-8.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
-; RUN: grep {leaq f(%rip),} %t1
+; RUN: grep "leaq f(%rip)," %t1
; RUN: not grep GOTPCREL %t1
define void ()* @g() {
diff --git a/test/CodeGen/X86/x86-64-pic-9.ll b/test/CodeGen/X86/x86-64-pic-9.ll
index 076103133fa9..6daea84e1a73 100644
--- a/test/CodeGen/X86/x86-64-pic-9.ll
+++ b/test/CodeGen/X86/x86-64-pic-9.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
-; RUN: grep {leaq f(%rip),} %t1
+; RUN: grep "leaq f(%rip)," %t1
; RUN: not grep GOTPCREL %t1
define void ()* @g() nounwind {
diff --git a/test/CodeGen/X86/xop-intrinsics-x86_64.ll b/test/CodeGen/X86/xop-intrinsics-x86_64.ll
index a2521b0a66db..8af782cd2f19 100644
--- a/test/CodeGen/X86/xop-intrinsics-x86_64.ll
+++ b/test/CodeGen/X86/xop-intrinsics-x86_64.ll
@@ -875,37 +875,37 @@ define <8 x i16> @test_int_x86_xop_vpshlw_mr(<8 x i16>* %a0, <8 x i16> %a1) {
}
declare <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16>, <8 x i16>) nounwind readnone
-define <4 x float> @test_int_x86_xop_vfrcz_ss(<4 x float> %a0, <4 x float> %a1) {
+define <4 x float> @test_int_x86_xop_vfrcz_ss(<4 x float> %a0) {
; CHECK-NOT: mov
; CHECK: vfrczss
- %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0, <4 x float> %a1) ;
+ %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0) ;
ret <4 x float> %res
}
-define <4 x float> @test_int_x86_xop_vfrcz_ss_mem(<4 x float> %a0, float* %a1) {
+define <4 x float> @test_int_x86_xop_vfrcz_ss_mem(float* %a0) {
; CHECK-NOT: mov
; CHECK: vfrczss
- %elem = load float* %a1
+ %elem = load float* %a0
%vec = insertelement <4 x float> undef, float %elem, i32 0
- %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0, <4 x float> %vec) ;
+ %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %vec) ;
ret <4 x float> %res
}
-declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>, <4 x float>) nounwind readnone
+declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>) nounwind readnone
-define <2 x double> @test_int_x86_xop_vfrcz_sd(<2 x double> %a0, <2 x double> %a1) {
+define <2 x double> @test_int_x86_xop_vfrcz_sd(<2 x double> %a0) {
; CHECK-NOT: mov
; CHECK: vfrczsd
- %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0, <2 x double> %a1) ;
+ %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0) ;
ret <2 x double> %res
}
-define <2 x double> @test_int_x86_xop_vfrcz_sd_mem(<2 x double> %a0, double* %a1) {
+define <2 x double> @test_int_x86_xop_vfrcz_sd_mem(double* %a0) {
; CHECK-NOT: mov
; CHECK: vfrczsd
- %elem = load double* %a1
+ %elem = load double* %a0
%vec = insertelement <2 x double> undef, double %elem, i32 0
- %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0, <2 x double> %vec) ;
+ %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %vec) ;
ret <2 x double> %res
}
-declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>, <2 x double>) nounwind readnone
+declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>) nounwind readnone
define <2 x double> @test_int_x86_xop_vfrcz_pd(<2 x double> %a0) {
; CHECK: vfrczpd
@@ -967,3 +967,59 @@ define <8 x float> @test_int_x86_xop_vfrcz_ps_256_mem(<8 x float>* %a0) {
}
declare <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float>) nounwind readnone
+define <16 x i8> @test_int_x86_xop_vpcomb(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK:vpcomb
+ %res = call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %a0, <16 x i8> %a1, i8 0) ;
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomw(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpcomw
+ %res = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %a0, <8 x i16> %a1, i8 0) ;
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16>, <8 x i16>, i8) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomd(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpcomd
+ %res = call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %a0, <4 x i32> %a1, i8 0) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32>, <4 x i32>, i8) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomq(<2 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK: vpcomq
+ %res = call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64>, <2 x i64>, i8) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomub(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK:vpcomub
+ %res = call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %a0, <16 x i8> %a1, i8 0) ;
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomuw(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpcomuw
+ %res = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %a0, <8 x i16> %a1, i8 0) ;
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomud(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpcomud
+ %res = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %a0, <4 x i32> %a1, i8 0) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32>, <4 x i32>, i8) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomuq(<2 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK: vpcomuq
+ %res = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readnone
+
diff --git a/test/CodeGen/X86/xor.ll b/test/CodeGen/X86/xor.ll
index ddc4cab14a4c..996bfc40ee56 100644
--- a/test/CodeGen/X86/xor.ll
+++ b/test/CodeGen/X86/xor.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
-; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse2 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mcpu=corei7 -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mcpu=corei7 -mtriple=x86_64-win32 | FileCheck %s -check-prefix=X64
; Though it is undefined, we want xor undef,undef to produce zero.
define <4 x i32> @test1() nounwind {
@@ -31,7 +31,7 @@ entry:
; X64: test3:
; X64: notl
; X64: andl
-; X64: shrl %eax
+; X64: shrl
; X64: ret
; X32: test3:
diff --git a/test/CodeGen/XCore/mkmsk.ll b/test/CodeGen/XCore/mkmsk.ll
new file mode 100644
index 000000000000..377612b7d215
--- /dev/null
+++ b/test/CodeGen/XCore/mkmsk.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+
+define i32 @f(i32) nounwind {
+; CHECK: f:
+; CHECK: mkmsk r0, r0
+; CHECK-NEXT: retsp 0
+entry:
+ %1 = shl i32 1, %0
+ %2 = add i32 %1, -1
+ ret i32 %2
+}