aboutsummaryrefslogtreecommitdiff
path: root/test/CodeGen/R600
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2015-01-18 16:17:27 +0000
committerDimitry Andric <dim@FreeBSD.org>2015-01-18 16:17:27 +0000
commit67c32a98315f785a9ec9d531c1f571a0196c7463 (patch)
tree4abb9cbeecc7901726dd0b4a37369596c852e9ef /test/CodeGen/R600
parent9f61947910e6ab40de38e6b4034751ef1513200f (diff)
downloadsrc-67c32a98315f785a9ec9d531c1f571a0196c7463.tar.gz
src-67c32a98315f785a9ec9d531c1f571a0196c7463.zip
Vendor import of llvm RELEASE_360/rc1 tag r226102 (effectively, 3.6.0 RC1):vendor/llvm/llvm-release_360-r226102
Notes
Notes: svn path=/vendor/llvm/dist/; revision=277323 svn path=/vendor/llvm/llvm-release_360-r226102/; revision=277324; tag=vendor/llvm/llvm-release_360-r226102
Diffstat (limited to 'test/CodeGen/R600')
-rw-r--r--test/CodeGen/R600/128bit-kernel-args.ll16
-rw-r--r--test/CodeGen/R600/32-bit-local-address-space.ll90
-rw-r--r--test/CodeGen/R600/64bit-kernel-args.ll10
-rw-r--r--test/CodeGen/R600/add-debug.ll23
-rw-r--r--test/CodeGen/R600/add.ll98
-rw-r--r--test/CodeGen/R600/add_i64.ll54
-rw-r--r--test/CodeGen/R600/address-space.ll12
-rw-r--r--test/CodeGen/R600/and.ll97
-rw-r--r--test/CodeGen/R600/anyext.ll6
-rw-r--r--test/CodeGen/R600/array-ptr-calc-i32.ll14
-rw-r--r--test/CodeGen/R600/array-ptr-calc-i64.ll13
-rw-r--r--test/CodeGen/R600/atomic_cmp_swap_local.ll88
-rw-r--r--test/CodeGen/R600/atomic_load_add.ll18
-rw-r--r--test/CodeGen/R600/atomic_load_sub.ll18
-rw-r--r--test/CodeGen/R600/basic-branch.ll4
-rw-r--r--test/CodeGen/R600/basic-loop.ll4
-rw-r--r--test/CodeGen/R600/bfe_uint.ll4
-rw-r--r--test/CodeGen/R600/bfi_int.ll16
-rw-r--r--test/CodeGen/R600/big_alu.ll1
-rw-r--r--test/CodeGen/R600/bitcast.ll18
-rw-r--r--test/CodeGen/R600/bswap.ll66
-rw-r--r--test/CodeGen/R600/build_vector.ll26
-rw-r--r--test/CodeGen/R600/call.ll4
-rw-r--r--test/CodeGen/R600/call_fs.ll4
-rw-r--r--test/CodeGen/R600/cayman-loop-bug.ll2
-rw-r--r--test/CodeGen/R600/cf-stack-bug.ll8
-rw-r--r--test/CodeGen/R600/codegen-prepare-addrmode-sext.ll11
-rw-r--r--test/CodeGen/R600/combine_vloads.ll2
-rw-r--r--test/CodeGen/R600/commute_modifiers.ll181
-rw-r--r--test/CodeGen/R600/complex-folding.ll2
-rw-r--r--test/CodeGen/R600/concat_vectors.ll161
-rw-r--r--test/CodeGen/R600/copy-illegal-type.ll180
-rw-r--r--test/CodeGen/R600/copy-to-reg.ll26
-rw-r--r--test/CodeGen/R600/ctlz_zero_undef.ll52
-rw-r--r--test/CodeGen/R600/ctpop.ll231
-rw-r--r--test/CodeGen/R600/ctpop64.ll111
-rw-r--r--test/CodeGen/R600/cttz-ctlz.ll224
-rw-r--r--test/CodeGen/R600/cttz_zero_undef.ll52
-rw-r--r--test/CodeGen/R600/cvt_f32_ubyte.ll188
-rw-r--r--test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll4
-rw-r--r--test/CodeGen/R600/default-fp-mode.ll16
-rw-r--r--test/CodeGen/R600/disconnected-predset-break-bug.ll2
-rw-r--r--test/CodeGen/R600/dot4-folding.ll2
-rw-r--r--test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll69
-rw-r--r--test/CodeGen/R600/ds_read2.ll515
-rw-r--r--test/CodeGen/R600/ds_read2_offset_order.ll44
-rw-r--r--test/CodeGen/R600/ds_read2st64.ll272
-rw-r--r--test/CodeGen/R600/ds_write2.ll425
-rw-r--r--test/CodeGen/R600/ds_write2st64.ll119
-rw-r--r--test/CodeGen/R600/elf.ll10
-rw-r--r--test/CodeGen/R600/empty-function.ll20
-rw-r--r--test/CodeGen/R600/extload.ll79
-rw-r--r--test/CodeGen/R600/extract_vector_elt_i16.ll22
-rw-r--r--test/CodeGen/R600/fabs.f64.ll97
-rw-r--r--test/CodeGen/R600/fabs.ll123
-rw-r--r--test/CodeGen/R600/fadd.ll93
-rw-r--r--test/CodeGen/R600/fadd64.ll6
-rw-r--r--test/CodeGen/R600/fceil.ll82
-rw-r--r--test/CodeGen/R600/fceil64.ll121
-rw-r--r--test/CodeGen/R600/fcmp.ll4
-rw-r--r--test/CodeGen/R600/fcmp64.ll56
-rw-r--r--test/CodeGen/R600/fconst64.ll8
-rw-r--r--test/CodeGen/R600/fcopysign.f32.ll28
-rw-r--r--test/CodeGen/R600/fcopysign.f64.ll30
-rw-r--r--test/CodeGen/R600/fdiv.ll36
-rw-r--r--test/CodeGen/R600/fdiv64.ll8
-rw-r--r--test/CodeGen/R600/fetch-limits.r600.ll2
-rw-r--r--test/CodeGen/R600/fetch-limits.r700+.ll2
-rw-r--r--test/CodeGen/R600/ffloor.ll121
-rw-r--r--test/CodeGen/R600/flat-address-space.ll182
-rw-r--r--test/CodeGen/R600/fma.f64.ll46
-rw-r--r--test/CodeGen/R600/fma.ll137
-rw-r--r--test/CodeGen/R600/fmax3.ll38
-rw-r--r--test/CodeGen/R600/fmax_legacy.f64.ll67
-rw-r--r--test/CodeGen/R600/fmax_legacy.ll116
-rw-r--r--test/CodeGen/R600/fmaxnum.f64.ll75
-rw-r--r--test/CodeGen/R600/fmaxnum.ll191
-rw-r--r--test/CodeGen/R600/fmin3.ll38
-rw-r--r--test/CodeGen/R600/fmin_legacy.f64.ll77
-rw-r--r--test/CodeGen/R600/fmin_legacy.ll123
-rw-r--r--test/CodeGen/R600/fminnum.f64.ll75
-rw-r--r--test/CodeGen/R600/fminnum.ll189
-rw-r--r--test/CodeGen/R600/fmul.ll70
-rw-r--r--test/CodeGen/R600/fmul64.ll33
-rw-r--r--test/CodeGen/R600/fmuladd.ll184
-rw-r--r--test/CodeGen/R600/fnearbyint.ll4
-rw-r--r--test/CodeGen/R600/fneg-fabs.f64.ll101
-rw-r--r--test/CodeGen/R600/fneg-fabs.ll162
-rw-r--r--test/CodeGen/R600/fneg.f64.ll59
-rw-r--r--test/CodeGen/R600/fneg.ll102
-rw-r--r--test/CodeGen/R600/fp-classify.ll130
-rw-r--r--test/CodeGen/R600/fp16_to_fp.ll20
-rw-r--r--test/CodeGen/R600/fp32_to_fp16.ll10
-rw-r--r--test/CodeGen/R600/fp64_to_sint.ll9
-rw-r--r--test/CodeGen/R600/fp_to_sint.f64.ll56
-rw-r--r--test/CodeGen/R600/fp_to_sint.ll68
-rw-r--r--test/CodeGen/R600/fp_to_uint.f64.ll67
-rw-r--r--test/CodeGen/R600/fp_to_uint.ll83
-rw-r--r--test/CodeGen/R600/fpext.ll6
-rw-r--r--test/CodeGen/R600/fptrunc.ll6
-rw-r--r--test/CodeGen/R600/frem.ll103
-rw-r--r--test/CodeGen/R600/fsqrt.ll13
-rw-r--r--test/CodeGen/R600/fsub.ll93
-rw-r--r--test/CodeGen/R600/fsub64.ll6
-rw-r--r--test/CodeGen/R600/ftrunc.f64.ll110
-rw-r--r--test/CodeGen/R600/ftrunc.ll82
-rw-r--r--test/CodeGen/R600/gep-address-space.ll34
-rw-r--r--test/CodeGen/R600/global-directive.ll14
-rw-r--r--test/CodeGen/R600/global-extload-i1.ll301
-rw-r--r--test/CodeGen/R600/global-extload-i16.ll301
-rw-r--r--test/CodeGen/R600/global-extload-i32.ll456
-rw-r--r--test/CodeGen/R600/global-extload-i8.ll298
-rw-r--r--test/CodeGen/R600/global-zero-initializer.ll12
-rw-r--r--test/CodeGen/R600/global_atomics.ll801
-rw-r--r--test/CodeGen/R600/gv-const-addrspace-fail.ll23
-rw-r--r--test/CodeGen/R600/gv-const-addrspace.ll24
-rw-r--r--test/CodeGen/R600/half.ll34
-rw-r--r--test/CodeGen/R600/hsa.ll12
-rw-r--r--test/CodeGen/R600/i1-copy-implicit-def.ll21
-rw-r--r--test/CodeGen/R600/i1-copy-phi.ll29
-rw-r--r--test/CodeGen/R600/icmp64.ll42
-rw-r--r--test/CodeGen/R600/imm.ll479
-rw-r--r--test/CodeGen/R600/indirect-addressing-si.ll18
-rw-r--r--test/CodeGen/R600/indirect-private-64.ll68
-rw-r--r--test/CodeGen/R600/infinite-loop.ll12
-rw-r--r--test/CodeGen/R600/inline-asm.ll11
-rw-r--r--test/CodeGen/R600/inline-calls.ll24
-rw-r--r--test/CodeGen/R600/input-mods.ll4
-rw-r--r--test/CodeGen/R600/insert_subreg.ll15
-rw-r--r--test/CodeGen/R600/insert_vector_elt.ll152
-rw-r--r--test/CodeGen/R600/insert_vector_elt_f64.ll36
-rw-r--r--test/CodeGen/R600/kcache-fold.ll4
-rw-r--r--test/CodeGen/R600/kernel-args.ll286
-rw-r--r--test/CodeGen/R600/large-alloca.ll2
-rw-r--r--test/CodeGen/R600/large-constant-initializer.ll4
-rw-r--r--test/CodeGen/R600/lds-initializer.ll12
-rw-r--r--test/CodeGen/R600/lds-oqap-crash.ll2
-rw-r--r--test/CodeGen/R600/lds-output-queue.ll6
-rw-r--r--test/CodeGen/R600/lds-size.ll4
-rw-r--r--test/CodeGen/R600/lds-zero-initializer.ll12
-rw-r--r--test/CodeGen/R600/legalizedag-bug-expand-setcc.ll2
-rw-r--r--test/CodeGen/R600/literals.ll8
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.abs.ll26
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll8
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll9
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll335
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll432
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.bfi.ll18
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.bfm.ll18
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.brev.ll24
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.clamp.ll58
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.class.ll497
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.cube.ll2
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll18
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll24
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll36
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.div_scale.ll273
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.fract.ll10
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.imad24.ll6
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.imax.ll12
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.imin.ll12
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.imul24.ll6
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.kill.ll10
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.ldexp.ll22
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.legacy.rsq.ll6
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.rcp.f64.ll16
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.rcp.ll58
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll6
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll6
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.rsq.ll25
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll24
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.trunc.ll8
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.umad24.ll25
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.umax.ll22
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.umin.ll22
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.umul24.ll6
-rw-r--r--test/CodeGen/R600/llvm.SI.fs.interp.constant.ll6
-rw-r--r--test/CodeGen/R600/llvm.SI.gather4.ll142
-rw-r--r--test/CodeGen/R600/llvm.SI.getlod.ll14
-rw-r--r--test/CodeGen/R600/llvm.SI.image.ll14
-rw-r--r--test/CodeGen/R600/llvm.SI.image.sample.ll82
-rw-r--r--test/CodeGen/R600/llvm.SI.image.sample.o.ll82
-rw-r--r--test/CodeGen/R600/llvm.SI.imageload.ll30
-rw-r--r--test/CodeGen/R600/llvm.SI.load.dword.ll14
-rw-r--r--test/CodeGen/R600/llvm.SI.resinfo.ll34
-rw-r--r--test/CodeGen/R600/llvm.SI.sample-masked.ll30
-rw-r--r--test/CodeGen/R600/llvm.SI.sample.ll38
-rw-r--r--test/CodeGen/R600/llvm.SI.sampled.ll34
-rw-r--r--test/CodeGen/R600/llvm.SI.sendmsg.ll12
-rw-r--r--test/CodeGen/R600/llvm.SI.tbuffer.store.ll18
-rw-r--r--test/CodeGen/R600/llvm.SI.tid.ll6
-rw-r--r--test/CodeGen/R600/llvm.amdgpu.kilp.ll8
-rw-r--r--test/CodeGen/R600/llvm.amdgpu.lrp.ll8
-rw-r--r--test/CodeGen/R600/llvm.cos.ll16
-rw-r--r--test/CodeGen/R600/llvm.exp2.ll22
-rw-r--r--test/CodeGen/R600/llvm.floor.ll28
-rw-r--r--test/CodeGen/R600/llvm.log2.ll22
-rw-r--r--test/CodeGen/R600/llvm.memcpy.ll364
-rw-r--r--test/CodeGen/R600/llvm.rint.f64.ll36
-rw-r--r--test/CodeGen/R600/llvm.rint.ll26
-rw-r--r--test/CodeGen/R600/llvm.round.ll16
-rw-r--r--test/CodeGen/R600/llvm.sin.ll111
-rw-r--r--test/CodeGen/R600/llvm.sqrt.ll26
-rw-r--r--test/CodeGen/R600/llvm.trunc.ll2
-rw-r--r--test/CodeGen/R600/load-i1.ll127
-rw-r--r--test/CodeGen/R600/load-input-fold.ll1
-rw-r--r--test/CodeGen/R600/load.ll372
-rw-r--r--test/CodeGen/R600/load.vec.ll14
-rw-r--r--test/CodeGen/R600/load64.ll20
-rw-r--r--test/CodeGen/R600/local-64.ll130
-rw-r--r--test/CodeGen/R600/local-atomics.ll478
-rw-r--r--test/CodeGen/R600/local-atomics64.ll392
-rw-r--r--test/CodeGen/R600/local-memory-two-objects.ll21
-rw-r--r--test/CodeGen/R600/local-memory.ll40
-rw-r--r--test/CodeGen/R600/loop-address.ll8
-rw-r--r--test/CodeGen/R600/loop-idiom.ll14
-rw-r--r--test/CodeGen/R600/lshl.ll4
-rw-r--r--test/CodeGen/R600/lshr.ll4
-rw-r--r--test/CodeGen/R600/m0-spill.ll34
-rw-r--r--test/CodeGen/R600/mad-sub.ll215
-rw-r--r--test/CodeGen/R600/mad_int24.ll19
-rw-r--r--test/CodeGen/R600/mad_uint24.ll22
-rw-r--r--test/CodeGen/R600/max-literals.ll4
-rw-r--r--test/CodeGen/R600/max.ll99
-rw-r--r--test/CodeGen/R600/max3.ll41
-rw-r--r--test/CodeGen/R600/min.ll120
-rw-r--r--test/CodeGen/R600/min3.ll111
-rw-r--r--test/CodeGen/R600/missing-store.ll26
-rw-r--r--test/CodeGen/R600/mubuf.ll107
-rw-r--r--test/CodeGen/R600/mul.ll166
-rw-r--r--test/CodeGen/R600/mul_int24.ll8
-rw-r--r--test/CodeGen/R600/mul_uint24.ll24
-rw-r--r--test/CodeGen/R600/mulhu.ll8
-rw-r--r--test/CodeGen/R600/no-initializer-constant-addrspace.ll6
-rw-r--r--test/CodeGen/R600/no-shrink-extloads.ll191
-rw-r--r--test/CodeGen/R600/operand-folding.ll113
-rw-r--r--test/CodeGen/R600/operand-spacing.ll15
-rw-r--r--test/CodeGen/R600/or.ll116
-rw-r--r--test/CodeGen/R600/packetizer.ll2
-rw-r--r--test/CodeGen/R600/parallelandifcollapse.ll3
-rw-r--r--test/CodeGen/R600/predicate-dp4.ll2
-rw-r--r--test/CodeGen/R600/predicates.ll8
-rw-r--r--test/CodeGen/R600/private-memory-atomics.ll2
-rw-r--r--test/CodeGen/R600/private-memory-broken.ll2
-rw-r--r--test/CodeGen/R600/private-memory.ll70
-rw-r--r--test/CodeGen/R600/pv.ll4
-rw-r--r--test/CodeGen/R600/r600-encoding.ll4
-rw-r--r--test/CodeGen/R600/r600-export-fix.ll4
-rw-r--r--test/CodeGen/R600/r600-infinite-loop-bug-while-reorganizing-vector.ll1
-rw-r--r--test/CodeGen/R600/r600cfg.ll1
-rw-r--r--test/CodeGen/R600/register-count-comments.ll11
-rw-r--r--test/CodeGen/R600/reorder-stores.ll116
-rw-r--r--test/CodeGen/R600/rotl.i64.ll26
-rw-r--r--test/CodeGen/R600/rotl.ll40
-rw-r--r--test/CodeGen/R600/rotr.i64.ll28
-rw-r--r--test/CodeGen/R600/rotr.ll22
-rw-r--r--test/CodeGen/R600/rsq.ll64
-rw-r--r--test/CodeGen/R600/s_movk_i32.ll184
-rw-r--r--test/CodeGen/R600/saddo.ll16
-rw-r--r--test/CodeGen/R600/salu-to-valu.ll50
-rw-r--r--test/CodeGen/R600/scalar_to_vector.ll34
-rw-r--r--test/CodeGen/R600/schedule-global-loads.ll41
-rw-r--r--test/CodeGen/R600/schedule-kernel-arg-loads.ll12
-rw-r--r--test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll4
-rw-r--r--test/CodeGen/R600/sdiv.ll26
-rw-r--r--test/CodeGen/R600/sdivrem24.ll238
-rw-r--r--test/CodeGen/R600/select-i1.ll8
-rw-r--r--test/CodeGen/R600/select-vectors.ll148
-rw-r--r--test/CodeGen/R600/select.ll2
-rw-r--r--test/CodeGen/R600/select64.ll28
-rw-r--r--test/CodeGen/R600/selectcc-opt.ll18
-rw-r--r--test/CodeGen/R600/selectcc.ll10
-rw-r--r--test/CodeGen/R600/set-dx10.ll24
-rw-r--r--test/CodeGen/R600/setcc-equivalent.ll7
-rw-r--r--test/CodeGen/R600/setcc-opt.ll234
-rw-r--r--test/CodeGen/R600/setcc.ll184
-rw-r--r--test/CodeGen/R600/setcc64.ll129
-rw-r--r--test/CodeGen/R600/seto.ll8
-rw-r--r--test/CodeGen/R600/setuo.ll8
-rw-r--r--test/CodeGen/R600/sext-eliminate.ll26
-rw-r--r--test/CodeGen/R600/sext-in-reg.ll383
-rw-r--r--test/CodeGen/R600/sgpr-control-flow.ll84
-rw-r--r--test/CodeGen/R600/sgpr-copy-duplicate-operand.ll6
-rw-r--r--test/CodeGen/R600/sgpr-copy.ll91
-rw-r--r--test/CodeGen/R600/shared-op-cycle.ll2
-rw-r--r--test/CodeGen/R600/shl.ll48
-rw-r--r--test/CodeGen/R600/shl_add_constant.ll90
-rw-r--r--test/CodeGen/R600/shl_add_ptr.ll283
-rw-r--r--test/CodeGen/R600/si-annotate-cf-assertion.ll4
-rw-r--r--test/CodeGen/R600/si-lod-bias.ll10
-rw-r--r--test/CodeGen/R600/si-sgpr-spill.ll14
-rw-r--r--test/CodeGen/R600/si-triv-disjoint-mem-access.ll238
-rw-r--r--test/CodeGen/R600/si-vector-hang.ll38
-rw-r--r--test/CodeGen/R600/sign_extend.ll40
-rw-r--r--test/CodeGen/R600/simplify-demanded-bits-build-pair.ll10
-rw-r--r--test/CodeGen/R600/sint_to_fp.f64.ll60
-rw-r--r--test/CodeGen/R600/sint_to_fp.ll66
-rw-r--r--test/CodeGen/R600/sint_to_fp64.ll35
-rw-r--r--test/CodeGen/R600/smrd.ll53
-rw-r--r--test/CodeGen/R600/split-scalar-i64-add.ll48
-rw-r--r--test/CodeGen/R600/sra.ll54
-rw-r--r--test/CodeGen/R600/srem.ll2
-rw-r--r--test/CodeGen/R600/srl.ll260
-rw-r--r--test/CodeGen/R600/ssubo.ll20
-rw-r--r--test/CodeGen/R600/store-barrier.ll42
-rw-r--r--test/CodeGen/R600/store-v3i32.ll6
-rw-r--r--test/CodeGen/R600/store-v3i64.ll14
-rw-r--r--test/CodeGen/R600/store-vector-ptrs.ll10
-rw-r--r--test/CodeGen/R600/store.ll215
-rw-r--r--test/CodeGen/R600/store.r600.ll4
-rw-r--r--test/CodeGen/R600/structurize.ll2
-rw-r--r--test/CodeGen/R600/structurize1.ll2
-rw-r--r--test/CodeGen/R600/sub.ll99
-rw-r--r--test/CodeGen/R600/subreg-coalescer-crash.ll45
-rw-r--r--test/CodeGen/R600/swizzle-export.ll6
-rw-r--r--test/CodeGen/R600/trunc-cmp-constant.ll169
-rw-r--r--test/CodeGen/R600/trunc-store-i1.ll26
-rw-r--r--test/CodeGen/R600/trunc-vector-store-assertion-failure.ll2
-rw-r--r--test/CodeGen/R600/trunc.ll56
-rw-r--r--test/CodeGen/R600/uaddo.ll30
-rw-r--r--test/CodeGen/R600/udiv.ll16
-rw-r--r--test/CodeGen/R600/udivrem.ll336
-rw-r--r--test/CodeGen/R600/udivrem24.ll244
-rw-r--r--test/CodeGen/R600/udivrem64.ll10
-rw-r--r--test/CodeGen/R600/uint_to_fp.f64.ll101
-rw-r--r--test/CodeGen/R600/uint_to_fp.ll71
-rw-r--r--test/CodeGen/R600/unaligned-load-store.ll105
-rw-r--r--test/CodeGen/R600/unhandled-loop-condition-assertion.ll12
-rw-r--r--test/CodeGen/R600/unsupported-cc.ll20
-rw-r--r--test/CodeGen/R600/urecip.ll4
-rw-r--r--test/CodeGen/R600/urem.ll84
-rw-r--r--test/CodeGen/R600/use-sgpr-multiple-times.ll96
-rw-r--r--test/CodeGen/R600/usubo.ll24
-rw-r--r--test/CodeGen/R600/v1i64-kernel-arg.ll4
-rw-r--r--test/CodeGen/R600/v_cndmask.ll42
-rw-r--r--test/CodeGen/R600/valu-i1.ll158
-rw-r--r--test/CodeGen/R600/vector-alloca.ll10
-rw-r--r--test/CodeGen/R600/vertex-fetch-encoding.ll6
-rw-r--r--test/CodeGen/R600/vop-shrink.ll27
-rw-r--r--test/CodeGen/R600/vselect.ll32
-rw-r--r--test/CodeGen/R600/vselect64.ll2
-rw-r--r--test/CodeGen/R600/vtx-fetch-branch.ll2
-rw-r--r--test/CodeGen/R600/vtx-schedule.ll2
-rw-r--r--test/CodeGen/R600/wait.ll60
-rw-r--r--test/CodeGen/R600/work-item-intrinsics.ll179
-rw-r--r--test/CodeGen/R600/wrong-transalu-pos-fix.ll8
-rw-r--r--test/CodeGen/R600/xor.ll108
-rw-r--r--test/CodeGen/R600/zero_extend.ll24
348 files changed, 18397 insertions, 5342 deletions
diff --git a/test/CodeGen/R600/128bit-kernel-args.ll b/test/CodeGen/R600/128bit-kernel-args.ll
index 3c4fcf740c39..3b3fee05af7f 100644
--- a/test/CodeGen/R600/128bit-kernel-args.ll
+++ b/test/CodeGen/R600/128bit-kernel-args.ll
@@ -1,27 +1,27 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
-; R600-CHECK: @v4i32_kernel_arg
+; R600-CHECK: {{^}}v4i32_kernel_arg:
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR:[0-9]]].X, KC0[3].Y
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Y, KC0[3].Z
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X
-; SI-CHECK: @v4i32_kernel_arg
-; SI-CHECK: BUFFER_STORE_DWORDX4
+; SI-CHECK: {{^}}v4i32_kernel_arg:
+; SI-CHECK: buffer_store_dwordx4
define void @v4i32_kernel_arg(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
entry:
store <4 x i32> %in, <4 x i32> addrspace(1)* %out
ret void
}
-; R600-CHECK: @v4f32_kernel_arg
+; R600-CHECK: {{^}}v4f32_kernel_arg:
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR:[0-9]]].X, KC0[3].Y
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Y, KC0[3].Z
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X
-; SI-CHECK: @v4f32_kernel_arg
-; SI-CHECK: BUFFER_STORE_DWORDX4
-define void @v4f32_kernel_args(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+; SI-CHECK: {{^}}v4f32_kernel_arg:
+; SI-CHECK: buffer_store_dwordx4
+define void @v4f32_kernel_arg(<4 x float> addrspace(1)* %out, <4 x float> %in) {
entry:
store <4 x float> %in, <4 x float> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/32-bit-local-address-space.ll b/test/CodeGen/R600/32-bit-local-address-space.ll
index 7dec42637421..6ab0c08d505f 100644
--- a/test/CodeGen/R600/32-bit-local-address-space.ll
+++ b/test/CodeGen/R600/32-bit-local-address-space.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; On Southern Islands GPUs the local address space(3) uses 32-bit pointers and
; the global address space(1) uses 64-bit pointers. These tests check to make sure
@@ -9,9 +9,9 @@
; Instructions with B32, U32, and I32 in their name take 32-bit operands, while
; instructions with B64, U64, and I64 take 64-bit operands.
-; CHECK-LABEL: @local_address_load
-; CHECK: V_MOV_B32_e{{32|64}} [[PTR:v[0-9]]]
-; CHECK: DS_READ_B32 v{{[0-9]+}}, [[PTR]]
+; FUNC-LABEL: {{^}}local_address_load:
+; SI: v_mov_b32_e{{32|64}} [[PTR:v[0-9]]]
+; SI: ds_read_b32 v{{[0-9]+}}, [[PTR]]
define void @local_address_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
%0 = load i32 addrspace(3)* %in
@@ -19,10 +19,10 @@ entry:
ret void
}
-; CHECK-LABEL: @local_address_gep
-; CHECK: S_ADD_I32 [[SPTR:s[0-9]]]
-; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; CHECK: DS_READ_B32 [[VPTR]]
+; FUNC-LABEL: {{^}}local_address_gep:
+; SI: s_add_i32 [[SPTR:s[0-9]]]
+; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; SI: ds_read_b32 [[VPTR]]
define void @local_address_gep(i32 addrspace(1)* %out, i32 addrspace(3)* %in, i32 %offset) {
entry:
%0 = getelementptr i32 addrspace(3)* %in, i32 %offset
@@ -31,9 +31,9 @@ entry:
ret void
}
-; CHECK-LABEL: @local_address_gep_const_offset
-; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
-; CHECK: DS_READ_B32 v{{[0-9]+}}, [[VPTR]], 0x4,
+; FUNC-LABEL: {{^}}local_address_gep_const_offset:
+; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
+; SI: ds_read_b32 v{{[0-9]+}}, [[VPTR]] offset:4
define void @local_address_gep_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
%0 = getelementptr i32 addrspace(3)* %in, i32 1
@@ -43,10 +43,10 @@ entry:
}
; Offset too large, can't fold into 16-bit immediate offset.
-; CHECK-LABEL: @local_address_gep_large_const_offset
-; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
-; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; CHECK: DS_READ_B32 [[VPTR]]
+; FUNC-LABEL: {{^}}local_address_gep_large_const_offset:
+; SI: s_add_i32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
+; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; SI: ds_read_b32 [[VPTR]]
define void @local_address_gep_large_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
%0 = getelementptr i32 addrspace(3)* %in, i32 16385
@@ -55,10 +55,10 @@ entry:
ret void
}
-; CHECK-LABEL: @null_32bit_lds_ptr:
-; CHECK: V_CMP_NE_I32
-; CHECK-NOT: V_CMP_NE_I32
-; CHECK: V_CNDMASK_B32
+; FUNC-LABEL: {{^}}null_32bit_lds_ptr:
+; SI: v_cmp_ne_i32
+; SI-NOT: v_cmp_ne_i32
+; SI: v_cndmask_b32
define void @null_32bit_lds_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %lds) nounwind {
%cmp = icmp ne i32 addrspace(3)* %lds, null
%x = select i1 %cmp, i32 123, i32 456
@@ -66,10 +66,10 @@ define void @null_32bit_lds_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %lds)
ret void
}
-; CHECK-LABEL: @mul_32bit_ptr:
-; CHECK: V_MUL_LO_I32
-; CHECK-NEXT: V_ADD_I32_e32
-; CHECK-NEXT: DS_READ_B32
+; FUNC-LABEL: {{^}}mul_32bit_ptr:
+; SI: s_mul_i32
+; SI-NEXT: s_add_i32
+; SI: ds_read_b32
define void @mul_32bit_ptr(float addrspace(1)* %out, [3 x float] addrspace(3)* %lds, i32 %tid) {
%ptr = getelementptr [3 x float] addrspace(3)* %lds, i32 %tid, i32 0
%val = load float addrspace(3)* %ptr
@@ -77,11 +77,11 @@ define void @mul_32bit_ptr(float addrspace(1)* %out, [3 x float] addrspace(3)* %
ret void
}
-@g_lds = addrspace(3) global float zeroinitializer, align 4
+@g_lds = addrspace(3) global float undef, align 4
-; CHECK-LABEL: @infer_ptr_alignment_global_offset:
-; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 0
-; CHECK: DS_READ_B32 v{{[0-9]+}}, [[REG]]
+; FUNC-LABEL: {{^}}infer_ptr_alignment_global_offset:
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
+; SI: ds_read_b32 v{{[0-9]+}}, [[REG]]
define void @infer_ptr_alignment_global_offset(float addrspace(1)* %out, i32 %tid) {
%val = load float addrspace(3)* @g_lds
store float %val, float addrspace(1)* %out
@@ -89,37 +89,37 @@ define void @infer_ptr_alignment_global_offset(float addrspace(1)* %out, i32 %ti
}
-@ptr = addrspace(3) global i32 addrspace(3)* null
-@dst = addrspace(3) global [16384 x i32] zeroinitializer
+@ptr = addrspace(3) global i32 addrspace(3)* undef
+@dst = addrspace(3) global [16384 x i32] undef
-; CHECK-LABEL: @global_ptr:
-; CHECK: DS_WRITE_B32
+; FUNC-LABEL: {{^}}global_ptr:
+; SI: ds_write_b32
define void @global_ptr() nounwind {
store i32 addrspace(3)* getelementptr ([16384 x i32] addrspace(3)* @dst, i32 0, i32 16), i32 addrspace(3)* addrspace(3)* @ptr
ret void
}
-; CHECK-LABEL: @local_address_store
-; CHECK: DS_WRITE_B32
+; FUNC-LABEL: {{^}}local_address_store:
+; SI: ds_write_b32
define void @local_address_store(i32 addrspace(3)* %out, i32 %val) {
store i32 %val, i32 addrspace(3)* %out
ret void
}
-; CHECK-LABEL: @local_address_gep_store
-; CHECK: S_ADD_I32 [[SADDR:s[0-9]+]],
-; CHECK: V_MOV_B32_e32 [[ADDR:v[0-9]+]], [[SADDR]]
-; CHECK: DS_WRITE_B32 [[ADDR]], v{{[0-9]+}},
+; FUNC-LABEL: {{^}}local_address_gep_store:
+; SI: s_add_i32 [[SADDR:s[0-9]+]],
+; SI: v_mov_b32_e32 [[ADDR:v[0-9]+]], [[SADDR]]
+; SI: ds_write_b32 [[ADDR]], v{{[0-9]+}}
define void @local_address_gep_store(i32 addrspace(3)* %out, i32, i32 %val, i32 %offset) {
%gep = getelementptr i32 addrspace(3)* %out, i32 %offset
store i32 %val, i32 addrspace(3)* %gep, align 4
ret void
}
-; CHECK-LABEL: @local_address_gep_const_offset_store
-; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
-; CHECK: V_MOV_B32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}}
-; CHECK: DS_WRITE_B32 [[VPTR]], [[VAL]], 0x4
+; FUNC-LABEL: {{^}}local_address_gep_const_offset_store:
+; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
+; SI: v_mov_b32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}}
+; SI: ds_write_b32 [[VPTR]], [[VAL]] offset:4
define void @local_address_gep_const_offset_store(i32 addrspace(3)* %out, i32 %val) {
%gep = getelementptr i32 addrspace(3)* %out, i32 1
store i32 %val, i32 addrspace(3)* %gep, align 4
@@ -127,10 +127,10 @@ define void @local_address_gep_const_offset_store(i32 addrspace(3)* %out, i32 %v
}
; Offset too large, can't fold into 16-bit immediate offset.
-; CHECK-LABEL: @local_address_gep_large_const_offset_store
-; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
-; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; CHECK: DS_WRITE_B32 [[VPTR]], v{{[0-9]+}}, 0
+; FUNC-LABEL: {{^}}local_address_gep_large_const_offset_store:
+; SI: s_add_i32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
+; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; SI: ds_write_b32 [[VPTR]], v{{[0-9]+}} [M0]{{$}}
define void @local_address_gep_large_const_offset_store(i32 addrspace(3)* %out, i32 %val) {
%gep = getelementptr i32 addrspace(3)* %out, i32 16385
store i32 %val, i32 addrspace(3)* %gep, align 4
diff --git a/test/CodeGen/R600/64bit-kernel-args.ll b/test/CodeGen/R600/64bit-kernel-args.ll
index 2d82c1e53919..02b6f34e9419 100644
--- a/test/CodeGen/R600/64bit-kernel-args.ll
+++ b/test/CodeGen/R600/64bit-kernel-args.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
-; SI-CHECK: @f64_kernel_arg
-; SI-CHECK-DAG: S_LOAD_DWORDX2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9
-; SI-CHECK-DAG: S_LOAD_DWORDX2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb
-; SI-CHECK: BUFFER_STORE_DWORDX2
+; SI-CHECK: {{^}}f64_kernel_arg:
+; SI-CHECK-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9
+; SI-CHECK-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb
+; SI-CHECK: buffer_store_dwordx2
define void @f64_kernel_arg(double addrspace(1)* %out, double %in) {
entry:
store double %in, double addrspace(1)* %out
diff --git a/test/CodeGen/R600/add-debug.ll b/test/CodeGen/R600/add-debug.ll
new file mode 100644
index 000000000000..85e9451d4a9b
--- /dev/null
+++ b/test/CodeGen/R600/add-debug.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=amdgcn -mcpu=tahiti -debug
+; REQUIRES: asserts
+
+; Check that SelectionDAGDumper does not crash on int_SI_if.
+define void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) {
+entry:
+ %0 = icmp eq i64 %a, 0
+ br i1 %0, label %if, label %else
+
+if:
+ %1 = load i64 addrspace(1)* %in
+ br label %endif
+
+else:
+ %2 = add i64 %a, %b
+ br label %endif
+
+endif:
+ %3 = phi i64 [%1, %if], [%2, %else]
+ store i64 %3, i64 addrspace(1)* %out
+ ret void
+}
+
diff --git a/test/CodeGen/R600/add.ll b/test/CodeGen/R600/add.ll
index 711a2bc41774..d95853a61048 100644
--- a/test/CodeGen/R600/add.ll
+++ b/test/CodeGen/R600/add.ll
@@ -1,12 +1,12 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK --check-prefix=FUNC %s
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=FUNC %s
+; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=FUNC %s
-;FUNC-LABEL: @test1:
+;FUNC-LABEL: {{^}}test1:
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI-CHECK: V_ADD_I32_e32 [[REG:v[0-9]+]], {{v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_add_i32_e32 [[REG:v[0-9]+]], {{v[0-9]+, v[0-9]+}}
;SI-CHECK-NOT: [[REG]]
-;SI-CHECK: BUFFER_STORE_DWORD [[REG]],
+;SI-CHECK: buffer_store_dword [[REG]],
define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
%a = load i32 addrspace(1)* %in
@@ -16,12 +16,12 @@ define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
ret void
}
-;FUNC-LABEL: @test2:
+;FUNC-LABEL: {{^}}test2:
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
@@ -32,16 +32,16 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
ret void
}
-;FUNC-LABEL: @test4:
+;FUNC-LABEL: {{^}}test4:
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
@@ -52,7 +52,7 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
ret void
}
-; FUNC-LABEL: @test8
+; FUNC-LABEL: {{^}}test8:
; EG-CHECK: ADD_INT
; EG-CHECK: ADD_INT
; EG-CHECK: ADD_INT
@@ -61,14 +61,14 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
; EG-CHECK: ADD_INT
; EG-CHECK: ADD_INT
; EG-CHECK: ADD_INT
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
define void @test8(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) {
entry:
%0 = add <8 x i32> %a, %b
@@ -76,7 +76,7 @@ entry:
ret void
}
-; FUNC-LABEL: @test16
+; FUNC-LABEL: {{^}}test16:
; EG-CHECK: ADD_INT
; EG-CHECK: ADD_INT
; EG-CHECK: ADD_INT
@@ -93,22 +93,22 @@ entry:
; EG-CHECK: ADD_INT
; EG-CHECK: ADD_INT
; EG-CHECK: ADD_INT
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
define void @test16(<16 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) {
entry:
%0 = add <16 x i32> %a, %b
@@ -116,9 +116,9 @@ entry:
ret void
}
-; FUNC-LABEL: @add64
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADDC_U32
+; FUNC-LABEL: {{^}}add64:
+; SI-CHECK: s_add_u32
+; SI-CHECK: s_addc_u32
define void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%0 = add i64 %a, %b
@@ -126,13 +126,13 @@ entry:
ret void
}
-; The V_ADDC_U32 and V_ADD_I32 instruction can't read SGPRs, because they
+; The v_addc_u32 and v_add_i32 instruction can't read SGPRs, because they
; use VCC. The test is designed so that %a will be stored in an SGPR and
; %0 will be stored in a VGPR, so the comiler will be forced to copy %a
; to a VGPR before doing the add.
-; FUNC-LABEL: @add64_sgpr_vgpr
-; SI-CHECK-NOT: V_ADDC_U32_e32 s
+; FUNC-LABEL: {{^}}add64_sgpr_vgpr:
+; SI-CHECK-NOT: v_addc_u32_e32 s
define void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) {
entry:
%0 = load i64 addrspace(1)* %in
@@ -141,12 +141,10 @@ entry:
ret void
}
-; Test i64 add inside a branch. We don't allow SALU instructions inside of
-; branches.
-; FIXME: We are being conservative here. We could allow this in some cases.
-; FUNC-LABEL: @add64_in_branch
-; SI-CHECK-NOT: S_ADD_I32
-; SI-CHECK-NOT: S_ADDC_U32
+; Test i64 add inside a branch.
+; FUNC-LABEL: {{^}}add64_in_branch:
+; SI-CHECK: s_add_u32
+; SI-CHECK: s_addc_u32
define void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) {
entry:
%0 = icmp eq i64 %a, 0
diff --git a/test/CodeGen/R600/add_i64.ll b/test/CodeGen/R600/add_i64.ll
index f733d9040421..1769409f5ef1 100644
--- a/test/CodeGen/R600/add_i64.ll
+++ b/test/CodeGen/R600/add_i64.ll
@@ -1,11 +1,11 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
declare i32 @llvm.r600.read.tidig.x() readnone
-; SI-LABEL: @test_i64_vreg:
-; SI: V_ADD_I32
-; SI: V_ADDC_U32
+; SI-LABEL: {{^}}test_i64_vreg:
+; SI: v_add_i32
+; SI: v_addc_u32
define void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) {
%tid = call i32 @llvm.r600.read.tidig.x() readnone
%a_ptr = getelementptr i64 addrspace(1)* %inA, i32 %tid
@@ -18,9 +18,9 @@ define void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noa
}
; Check that the SGPR add operand is correctly moved to a VGPR.
-; SI-LABEL: @sgpr_operand:
-; SI: V_ADD_I32
-; SI: V_ADDC_U32
+; SI-LABEL: {{^}}sgpr_operand:
+; SI: v_add_i32
+; SI: v_addc_u32
define void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 addrspace(1)* noalias %in_bar, i64 %a) {
%foo = load i64 addrspace(1)* %in, align 8
%result = add i64 %foo, %a
@@ -31,9 +31,9 @@ define void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noal
; Swap the arguments. Check that the SGPR -> VGPR copy works with the
; SGPR as other operand.
;
-; SI-LABEL: @sgpr_operand_reversed:
-; SI: V_ADD_I32
-; SI: V_ADDC_U32
+; SI-LABEL: {{^}}sgpr_operand_reversed:
+; SI: v_add_i32
+; SI: v_addc_u32
define void @sgpr_operand_reversed(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %a) {
%foo = load i64 addrspace(1)* %in, align 8
%result = add i64 %a, %foo
@@ -42,22 +42,22 @@ define void @sgpr_operand_reversed(i64 addrspace(1)* noalias %out, i64 addrspace
}
-; SI-LABEL: @test_v2i64_sreg:
-; SI: S_ADD_I32
-; SI: S_ADDC_U32
-; SI: S_ADD_I32
-; SI: S_ADDC_U32
+; SI-LABEL: {{^}}test_v2i64_sreg:
+; SI: s_add_u32
+; SI: s_addc_u32
+; SI: s_add_u32
+; SI: s_addc_u32
define void @test_v2i64_sreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %a, <2 x i64> %b) {
%result = add <2 x i64> %a, %b
store <2 x i64> %result, <2 x i64> addrspace(1)* %out
ret void
}
-; SI-LABEL: @test_v2i64_vreg:
-; SI: V_ADD_I32
-; SI: V_ADDC_U32
-; SI: V_ADD_I32
-; SI: V_ADDC_U32
+; SI-LABEL: {{^}}test_v2i64_vreg:
+; SI: v_add_i32
+; SI: v_addc_u32
+; SI: v_add_i32
+; SI: v_addc_u32
define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) {
%tid = call i32 @llvm.r600.read.tidig.x() readnone
%a_ptr = getelementptr <2 x i64> addrspace(1)* %inA, i32 %tid
@@ -69,13 +69,13 @@ define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> add
ret void
}
-; SI-LABEL: @trunc_i64_add_to_i32
-; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG0:[0-9]+]]
-; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG1:[0-9]+]]
-; SI: S_ADD_I32 [[SRESULT:s[0-9]+]], s[[SREG1]], s[[SREG0]]
-; SI-NOT: ADDC
-; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
-; SI: BUFFER_STORE_DWORD [[VRESULT]],
+; SI-LABEL: {{^}}trunc_i64_add_to_i32:
+; SI: s_load_dword s[[SREG0:[0-9]+]]
+; SI: s_load_dword s[[SREG1:[0-9]+]]
+; SI: s_add_i32 [[SRESULT:s[0-9]+]], s[[SREG1]], s[[SREG0]]
+; SI-NOT: addc
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: buffer_store_dword [[VRESULT]],
define void @trunc_i64_add_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
%add = add i64 %b, %a
%trunc = trunc i64 %add to i32
diff --git a/test/CodeGen/R600/address-space.ll b/test/CodeGen/R600/address-space.ll
index f75a8ac5e6a5..1106f4f99623 100644
--- a/test/CodeGen/R600/address-space.ll
+++ b/test/CodeGen/R600/address-space.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s
; Test that codegenprepare understands address space sizes
@@ -7,11 +7,11 @@
; FIXME: Extra V_MOV from SGPR to VGPR for second read. The address is
; already in a VGPR after the first read.
-; CHECK-LABEL: @do_as_ptr_calcs:
-; CHECK: S_LOAD_DWORD [[SREG1:s[0-9]+]],
-; CHECK: V_MOV_B32_e32 [[VREG1:v[0-9]+]], [[SREG1]]
-; CHECK: DS_READ_B32 v{{[0-9]+}}, [[VREG1]], 0x14
-; CHECK: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}, 0xc
+; CHECK-LABEL: {{^}}do_as_ptr_calcs:
+; CHECK: s_load_dword [[SREG1:s[0-9]+]],
+; CHECK: v_mov_b32_e32 [[VREG1:v[0-9]+]], [[SREG1]]
+; CHECK-DAG: ds_read_b32 v{{[0-9]+}}, [[VREG1]] offset:12
+; CHECK-DAG: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:20
define void @do_as_ptr_calcs(%struct.foo addrspace(3)* nocapture %ptr) nounwind {
entry:
%x = getelementptr inbounds %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0
diff --git a/test/CodeGen/R600/and.ll b/test/CodeGen/R600/and.ll
index e20037e6bb67..bfdf8734eabb 100644
--- a/test/CodeGen/R600/and.ll
+++ b/test/CodeGen/R600/and.ll
@@ -1,12 +1,12 @@
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; FUNC-LABEL: @test2
+; FUNC-LABEL: {{^}}test2:
; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
@@ -17,16 +17,16 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
ret void
}
-; FUNC-LABEL: @test4
+; FUNC-LABEL: {{^}}test4:
; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
@@ -37,24 +37,24 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
ret void
}
-; FUNC-LABEL: @s_and_i32
-; SI: S_AND_B32
+; FUNC-LABEL: {{^}}s_and_i32:
+; SI: s_and_b32
define void @s_and_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
%and = and i32 %a, %b
store i32 %and, i32 addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @s_and_constant_i32
-; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x12d687
+; FUNC-LABEL: {{^}}s_and_constant_i32:
+; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x12d687
define void @s_and_constant_i32(i32 addrspace(1)* %out, i32 %a) {
%and = and i32 %a, 1234567
store i32 %and, i32 addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @v_and_i32
-; SI: V_AND_B32
+; FUNC-LABEL: {{^}}v_and_i32:
+; SI: v_and_b32
define void @v_and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) {
%a = load i32 addrspace(1)* %aptr, align 4
%b = load i32 addrspace(1)* %bptr, align 4
@@ -63,8 +63,8 @@ define void @v_and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addr
ret void
}
-; FUNC-LABEL: @v_and_constant_i32
-; SI: V_AND_B32
+; FUNC-LABEL: {{^}}v_and_constant_i32:
+; SI: v_and_b32
define void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
%a = load i32 addrspace(1)* %aptr, align 4
%and = and i32 %a, 1234567
@@ -72,8 +72,8 @@ define void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr)
ret void
}
-; FUNC-LABEL: @s_and_i64
-; SI: S_AND_B64
+; FUNC-LABEL: {{^}}s_and_i64:
+; SI: s_and_b64
define void @s_and_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
%and = and i64 %a, %b
store i64 %and, i64 addrspace(1)* %out, align 8
@@ -81,25 +81,25 @@ define void @s_and_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
}
; FIXME: Should use SGPRs
-; FUNC-LABEL: @s_and_i1
-; SI: V_AND_B32
+; FUNC-LABEL: {{^}}s_and_i1:
+; SI: v_and_b32
define void @s_and_i1(i1 addrspace(1)* %out, i1 %a, i1 %b) {
%and = and i1 %a, %b
store i1 %and, i1 addrspace(1)* %out
ret void
}
-; FUNC-LABEL: @s_and_constant_i64
-; SI: S_AND_B64
+; FUNC-LABEL: {{^}}s_and_constant_i64:
+; SI: s_and_b64
define void @s_and_constant_i64(i64 addrspace(1)* %out, i64 %a) {
%and = and i64 %a, 281474976710655
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
}
-; FUNC-LABEL: @v_and_i64
-; SI: V_AND_B32
-; SI: V_AND_B32
+; FUNC-LABEL: {{^}}v_and_i64:
+; SI: v_and_b32
+; SI: v_and_b32
define void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
%a = load i64 addrspace(1)* %aptr, align 8
%b = load i64 addrspace(1)* %bptr, align 8
@@ -108,12 +108,51 @@ define void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addr
ret void
}
-; FUNC-LABEL: @v_and_constant_i64
-; SI: V_AND_B32
-; SI: V_AND_B32
+; FUNC-LABEL: {{^}}v_and_i64_br:
+; SI: v_and_b32
+; SI: v_and_b32
+define void @v_and_i64_br(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i32 %cond) {
+entry:
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %if, label %endif
+
+if:
+ %a = load i64 addrspace(1)* %aptr, align 8
+ %b = load i64 addrspace(1)* %bptr, align 8
+ %and = and i64 %a, %b
+ br label %endif
+
+endif:
+ %tmp1 = phi i64 [%and, %if], [0, %entry]
+ store i64 %tmp1, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_and_constant_i64:
+; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
define void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
%a = load i64 addrspace(1)* %aptr, align 8
%and = and i64 %a, 1234567
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
}
+
+; FIXME: Replace and 0 with mov 0
+; FUNC-LABEL: {{^}}v_and_inline_imm_i64:
+; SI: v_and_b32_e32 {{v[0-9]+}}, 64, {{v[0-9]+}}
+; SI: v_and_b32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}}
+define void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
+ %a = load i64 addrspace(1)* %aptr, align 8
+ %and = and i64 %a, 64
+ store i64 %and, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_and_inline_imm_i64:
+; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 64
+define void @s_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+ %and = and i64 %a, 64
+ store i64 %and, i64 addrspace(1)* %out, align 8
+ ret void
+}
diff --git a/test/CodeGen/R600/anyext.ll b/test/CodeGen/R600/anyext.ll
index bbe5d0a393e6..8336ebcaca3b 100644
--- a/test/CodeGen/R600/anyext.ll
+++ b/test/CodeGen/R600/anyext.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-; CHECK-LABEL: @anyext_i1_i32
-; CHECK: V_CNDMASK_B32_e64
+; CHECK-LABEL: {{^}}anyext_i1_i32:
+; CHECK: v_cndmask_b32_e64
define void @anyext_i1_i32(i32 addrspace(1)* %out, i32 %cond) {
entry:
%0 = icmp eq i32 %cond, 0
diff --git a/test/CodeGen/R600/array-ptr-calc-i32.ll b/test/CodeGen/R600/array-ptr-calc-i32.ll
index a2b697823519..33a8aee0164d 100644
--- a/test/CodeGen/R600/array-ptr-calc-i32.ll
+++ b/test/CodeGen/R600/array-ptr-calc-i32.ll
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s
-; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI -mattr=+promote-alloca < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI -mattr=+promote-alloca < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
declare i32 @llvm.SI.tid() nounwind readnone
declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
@@ -9,21 +9,21 @@ declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
; 64-bit pointer add. This should work since private pointers should
; be 32-bits.
-; SI-LABEL: @test_private_array_ptr_calc:
+; SI-LABEL: {{^}}test_private_array_ptr_calc:
; FIXME: We end up with zero argument for ADD, because
; SIRegisterInfo::eliminateFrameIndex() blindly replaces the frame index
; with the appropriate offset. We should fold this into the store.
-; SI-ALLOCA: V_ADD_I32_e32 [[PTRREG:v[0-9]+]], 0, v{{[0-9]+}}
-; SI-ALLOCA: BUFFER_STORE_DWORD {{v[0-9]+}}, s[{{[0-9]+:[0-9]+}}], [[PTRREG]]
+; SI-ALLOCA: v_add_i32_e32 [[PTRREG:v[0-9]+]], 0, v{{[0-9]+}}
+; SI-ALLOCA: buffer_store_dword {{v[0-9]+}}, [[PTRREG]], s[{{[0-9]+:[0-9]+}}]
;
; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this
; alloca to a vector. It currently fails because it does not know how
; to interpret:
; getelementptr [4 x i32]* %alloca, i32 1, i32 %b
-; SI-PROMOTE: V_ADD_I32_e32 [[PTRREG:v[0-9]+]]
-; SI-PROMOTE: DS_WRITE_B32 {{v[0-9]+}}, [[PTRREG]]
+; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], 16
+; SI-PROMOTE: ds_write_b32 [[PTRREG]]
define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
%alloca = alloca [4 x i32], i32 4, align 16
%tid = call i32 @llvm.SI.tid() readnone
diff --git a/test/CodeGen/R600/array-ptr-calc-i64.ll b/test/CodeGen/R600/array-ptr-calc-i64.ll
index e254c5f64637..32e657db7bc6 100644
--- a/test/CodeGen/R600/array-ptr-calc-i64.ll
+++ b/test/CodeGen/R600/array-ptr-calc-i64.ll
@@ -1,13 +1,13 @@
-; XFAIL: *
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
declare i32 @llvm.SI.tid() readnone
-
-; SI-LABEL: @test_array_ptr_calc(
-define void @test_array_ptr_calc(i32 addrspace(1)* noalias %out, [16 x i32] addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
+; SI-LABEL: {{^}}test_array_ptr_calc:
+; SI: v_mul_lo_i32
+; SI: v_mul_hi_i32
+define void @test_array_ptr_calc(i32 addrspace(1)* noalias %out, [1025 x i32] addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
%tid = call i32 @llvm.SI.tid() readnone
- %a_ptr = getelementptr [16 x i32] addrspace(1)* %inA, i32 1, i32 %tid
+ %a_ptr = getelementptr [1025 x i32] addrspace(1)* %inA, i32 %tid, i32 0
%b_ptr = getelementptr i32 addrspace(1)* %inB, i32 %tid
%a = load i32 addrspace(1)* %a_ptr
%b = load i32 addrspace(1)* %b_ptr
@@ -15,4 +15,3 @@ define void @test_array_ptr_calc(i32 addrspace(1)* noalias %out, [16 x i32] addr
store i32 %result, i32 addrspace(1)* %out
ret void
}
-
diff --git a/test/CodeGen/R600/atomic_cmp_swap_local.ll b/test/CodeGen/R600/atomic_cmp_swap_local.ll
index eb9539eec516..0d5ece4b0e0b 100644
--- a/test/CodeGen/R600/atomic_cmp_swap_local.ll
+++ b/test/CodeGen/R600/atomic_cmp_swap_local.ll
@@ -1,13 +1,14 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=CI -check-prefix=FUNC %s
-; FUNC-LABEL: @lds_atomic_cmpxchg_ret_i32_offset:
-; SI: S_LOAD_DWORD [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: V_MOV_B32_e32 [[VCMP:v[0-9]+]], 7
-; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
-; SI-DAG: V_MOV_B32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
-; SI: DS_CMPST_RTN_B32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]], 0x10, [M0]
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_offset:
+; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI-DAG: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7
+; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; SI-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
+; SI: ds_cmpst_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
+; SI: s_endpgm
define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
@@ -16,18 +17,17 @@ define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrs
ret void
}
-; FUNC-LABEL: @lds_atomic_cmpxchg_ret_i64_offset:
-; SI: S_LOAD_DWORDX2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: S_MOV_B64 s{{\[}}[[LOSCMP:[0-9]+]]:[[HISCMP:[0-9]+]]{{\]}}, 7
-; SI-DAG: V_MOV_B32_e32 v[[LOVCMP:[0-9]+]], s[[LOSCMP]]
-; SI-DAG: V_MOV_B32_e32 v[[HIVCMP:[0-9]+]], s[[HISCMP]]
-; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
-; SI-DAG: V_MOV_B32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
-; SI-DAG: V_MOV_B32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
-; SI: DS_CMPST_RTN_B64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}}, 0x20, [M0]
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i64_offset:
+; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7
+; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0
+; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; SI-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
+; SI-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
+; SI: ds_cmpst_rtn_b64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
+; SI: buffer_store_dwordx2 [[RESULT]],
+; SI: s_endpgm
define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr, i64 %swap) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%pair = cmpxchg i64 addrspace(3)* %gep, i64 7, i64 %swap seq_cst monotonic
@@ -35,3 +35,49 @@ define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrs
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}
+
+; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_bad_si_offset
+; SI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+; CI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
+; SI: s_endpgm
+define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap, i32 %a, i32 %b) nounwind {
+ %sub = sub i32 %a, %b
+ %add = add i32 %sub, 4
+ %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
+ %pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
+ %result = extractvalue { i32, i1 } %pair, 0
+ store i32 %result, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_noret_i32_offset:
+; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
+; SI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xa
+; SI-DAG: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7
+; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; SI-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
+; SI: ds_cmpst_b32 [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
+; SI: s_endpgm
+define void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* %ptr, i32 %swap) nounwind {
+ %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
+ %result = extractvalue { i32, i1 } %pair, 0
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_noret_i64_offset:
+; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
+; SI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7
+; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0
+; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; SI-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
+; SI-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
+; SI: ds_cmpst_b64 [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
+; SI: s_endpgm
+define void @lds_atomic_cmpxchg_noret_i64_offset(i64 addrspace(3)* %ptr, i64 %swap) nounwind {
+ %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %pair = cmpxchg i64 addrspace(3)* %gep, i64 7, i64 %swap seq_cst monotonic
+ %result = extractvalue { i64, i1 } %pair, 0
+ ret void
+}
diff --git a/test/CodeGen/R600/atomic_load_add.ll b/test/CodeGen/R600/atomic_load_add.ll
index c26f9cd80eaf..6dd1c51afb4a 100644
--- a/test/CodeGen/R600/atomic_load_add.ll
+++ b/test/CodeGen/R600/atomic_load_add.ll
@@ -1,35 +1,35 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
-; FUNC-LABEL: @atomic_add_local
+; FUNC-LABEL: {{^}}atomic_add_local:
; R600: LDS_ADD *
-; SI: DS_ADD_RTN_U32
+; SI: ds_add_u32
define void @atomic_add_local(i32 addrspace(3)* %local) {
%unused = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst
ret void
}
-; FUNC-LABEL: @atomic_add_local_const_offset
+; FUNC-LABEL: {{^}}atomic_add_local_const_offset:
; R600: LDS_ADD *
-; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
+; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
define void @atomic_add_local_const_offset(i32 addrspace(3)* %local) {
%gep = getelementptr i32 addrspace(3)* %local, i32 4
%val = atomicrmw volatile add i32 addrspace(3)* %gep, i32 5 seq_cst
ret void
}
-; FUNC-LABEL: @atomic_add_ret_local
+; FUNC-LABEL: {{^}}atomic_add_ret_local:
; R600: LDS_ADD_RET *
-; SI: DS_ADD_RTN_U32
+; SI: ds_add_rtn_u32
define void @atomic_add_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
%val = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst
store i32 %val, i32 addrspace(1)* %out
ret void
}
-; FUNC-LABEL: @atomic_add_ret_local_const_offset
+; FUNC-LABEL: {{^}}atomic_add_ret_local_const_offset:
; R600: LDS_ADD_RET *
-; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x14
+; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20
define void @atomic_add_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
%gep = getelementptr i32 addrspace(3)* %local, i32 5
%val = atomicrmw volatile add i32 addrspace(3)* %gep, i32 5 seq_cst
diff --git a/test/CodeGen/R600/atomic_load_sub.ll b/test/CodeGen/R600/atomic_load_sub.ll
index 3569d91e08dc..5d47185421a0 100644
--- a/test/CodeGen/R600/atomic_load_sub.ll
+++ b/test/CodeGen/R600/atomic_load_sub.ll
@@ -1,35 +1,35 @@
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; FUNC-LABEL: @atomic_sub_local
+; FUNC-LABEL: {{^}}atomic_sub_local:
; R600: LDS_SUB *
-; SI: DS_SUB_RTN_U32
+; SI: ds_sub_u32
define void @atomic_sub_local(i32 addrspace(3)* %local) {
%unused = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst
ret void
}
-; FUNC-LABEL: @atomic_sub_local_const_offset
+; FUNC-LABEL: {{^}}atomic_sub_local_const_offset:
; R600: LDS_SUB *
-; SI: DS_SUB_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
+; SI: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
define void @atomic_sub_local_const_offset(i32 addrspace(3)* %local) {
%gep = getelementptr i32 addrspace(3)* %local, i32 4
%val = atomicrmw volatile sub i32 addrspace(3)* %gep, i32 5 seq_cst
ret void
}
-; FUNC-LABEL: @atomic_sub_ret_local
+; FUNC-LABEL: {{^}}atomic_sub_ret_local:
; R600: LDS_SUB_RET *
-; SI: DS_SUB_RTN_U32
+; SI: ds_sub_rtn_u32
define void @atomic_sub_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
%val = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst
store i32 %val, i32 addrspace(1)* %out
ret void
}
-; FUNC-LABEL: @atomic_sub_ret_local_const_offset
+; FUNC-LABEL: {{^}}atomic_sub_ret_local_const_offset:
; R600: LDS_SUB_RET *
-; SI: DS_SUB_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x14
+; SI: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20
define void @atomic_sub_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
%gep = getelementptr i32 addrspace(3)* %local, i32 5
%val = atomicrmw volatile sub i32 addrspace(3)* %gep, i32 5 seq_cst
diff --git a/test/CodeGen/R600/basic-branch.ll b/test/CodeGen/R600/basic-branch.ll
index d084132d4fcc..42ddddd2ed84 100644
--- a/test/CodeGen/R600/basic-branch.ll
+++ b/test/CodeGen/R600/basic-branch.ll
@@ -1,7 +1,7 @@
; XFAIL: *
-; RUN: llc -O0 -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -O0 -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s
-; CHECK-LABEL: @test_branch(
+; CHECK-LABEL: {{^}}test_branch(
define void @test_branch(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val) nounwind {
%cmp = icmp ne i32 %val, 0
br i1 %cmp, label %store, label %end
diff --git a/test/CodeGen/R600/basic-loop.ll b/test/CodeGen/R600/basic-loop.ll
index 6d0ff0743b85..72737ae273e6 100644
--- a/test/CodeGen/R600/basic-loop.ll
+++ b/test/CodeGen/R600/basic-loop.ll
@@ -1,7 +1,7 @@
; XFAIL: *
-; RUN: llc -O0 -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck %s
+; RUN: llc -O0 -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s
-; CHECK-LABEL: @test_loop:
+; CHECK-LABEL: {{^}}test_loop:
define void @test_loop(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val) nounwind {
entry:
br label %loop.body
diff --git a/test/CodeGen/R600/bfe_uint.ll b/test/CodeGen/R600/bfe_uint.ll
index fe466e6ad5fd..6fe23e912952 100644
--- a/test/CodeGen/R600/bfe_uint.ll
+++ b/test/CodeGen/R600/bfe_uint.ll
@@ -2,7 +2,7 @@
; XFAIL: *
-; CHECK: @bfe_def
+; CHECK: {{^}}bfe_def:
; CHECK: BFE_UINT
define void @bfe_def(i32 addrspace(1)* %out, i32 %x) {
entry:
@@ -17,7 +17,7 @@ entry:
; implmented with a LSHR instruction, which is better, because LSHR has less
; operands and requires less constants.
-; CHECK: @bfe_shift
+; CHECK: {{^}}bfe_shift:
; CHECK-NOT: BFE_UINT
define void @bfe_shift(i32 addrspace(1)* %out, i32 %x) {
entry:
diff --git a/test/CodeGen/R600/bfi_int.ll b/test/CodeGen/R600/bfi_int.ll
index d18702a1de98..988a2f85e0ea 100644
--- a/test/CodeGen/R600/bfi_int.ll
+++ b/test/CodeGen/R600/bfi_int.ll
@@ -1,13 +1,13 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
; BFI_INT Definition pattern from ISA docs
; (y & x) | (z & ~x)
;
-; R600-CHECK: @bfi_def
+; R600-CHECK: {{^}}bfi_def:
; R600-CHECK: BFI_INT
; SI-CHECK: @bfi_def
-; SI-CHECK: V_BFI_B32
+; SI-CHECK: v_bfi_b32
define void @bfi_def(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
entry:
%0 = xor i32 %x, -1
@@ -20,10 +20,10 @@ entry:
; SHA-256 Ch function
; z ^ (x & (y ^ z))
-; R600-CHECK: @bfi_sha256_ch
+; R600-CHECK: {{^}}bfi_sha256_ch:
; R600-CHECK: BFI_INT
; SI-CHECK: @bfi_sha256_ch
-; SI-CHECK: V_BFI_B32
+; SI-CHECK: v_bfi_b32
define void @bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
entry:
%0 = xor i32 %y, %z
@@ -35,11 +35,11 @@ entry:
; SHA-256 Ma function
; ((x & z) | (y & (x | z)))
-; R600-CHECK: @bfi_sha256_ma
+; R600-CHECK: {{^}}bfi_sha256_ma:
; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], KC0[2].Z, KC0[2].W
; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W
-; SI-CHECK: V_XOR_B32_e32 [[DST:v[0-9]+]], {{[sv][0-9]+, v[0-9]+}}
-; SI-CHECK: V_BFI_B32 {{v[0-9]+}}, [[DST]], {{[sv][0-9]+, [sv][0-9]+}}
+; SI-CHECK: v_xor_b32_e32 [[DST:v[0-9]+]], {{s[0-9]+, v[0-9]+}}
+; SI-CHECK: v_bfi_b32 {{v[0-9]+}}, [[DST]], {{s[0-9]+, v[0-9]+}}
define void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
entry:
diff --git a/test/CodeGen/R600/big_alu.ll b/test/CodeGen/R600/big_alu.ll
index 511e8ef62951..28be216e76f2 100644
--- a/test/CodeGen/R600/big_alu.ll
+++ b/test/CodeGen/R600/big_alu.ll
@@ -1,5 +1,4 @@
;RUN: llc < %s -march=r600 -mcpu=cedar
-;REQUIRES: asserts
;This test ensures that R600 backend can handle ifcvt properly
;and do not generate ALU clauses with more than 128 instructions.
diff --git a/test/CodeGen/R600/bitcast.ll b/test/CodeGen/R600/bitcast.ll
index 0be79e658f5c..3607d519f013 100644
--- a/test/CodeGen/R600/bitcast.ll
+++ b/test/CodeGen/R600/bitcast.ll
@@ -1,11 +1,11 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; This test just checks that the compiler doesn't crash.
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-; FUNC-LABEL: @v32i8_to_v8i32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v32i8_to_v8i32:
+; SI: s_endpgm
define void @v32i8_to_v8i32(<32 x i8> addrspace(2)* inreg) #0 {
entry:
%1 = load <32 x i8> addrspace(2)* %0
@@ -17,8 +17,8 @@ entry:
ret void
}
-; FUNC-LABEL: @i8ptr_v16i8ptr
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}i8ptr_v16i8ptr:
+; SI: s_endpgm
define void @i8ptr_v16i8ptr(<16 x i8> addrspace(1)* %out, i8 addrspace(1)* %in) {
entry:
%0 = bitcast i8 addrspace(1)* %in to <16 x i8> addrspace(1)*
@@ -55,8 +55,8 @@ define void @i32_to_v4i8(<4 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nou
ret void
}
-; FUNC-LABEL: @bitcast_v2i32_to_f64
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bitcast_v2i32_to_f64:
+; SI: s_endpgm
define void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%val = load <2 x i32> addrspace(1)* %in, align 8
%add = add <2 x i32> %val, <i32 4, i32 9>
@@ -65,8 +65,8 @@ define void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace
ret void
}
-; FUNC-LABEL: @bitcast_f64_to_v2i32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bitcast_f64_to_v2i32:
+; SI: s_endpgm
define void @bitcast_f64_to_v2i32(<2 x i32> addrspace(1)* %out, double addrspace(1)* %in) {
%val = load double addrspace(1)* %in, align 8
%add = fadd double %val, 4.0
diff --git a/test/CodeGen/R600/bswap.ll b/test/CodeGen/R600/bswap.ll
index 6aebe851366c..65998f5f1151 100644
--- a/test/CodeGen/R600/bswap.ll
+++ b/test/CodeGen/R600/bswap.ll
@@ -1,12 +1,21 @@
-; RUN: llc -march=r600 -mcpu=SI < %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
declare i32 @llvm.bswap.i32(i32) nounwind readnone
declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) nounwind readnone
declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) nounwind readnone
+declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>) nounwind readnone
declare i64 @llvm.bswap.i64(i64) nounwind readnone
declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) nounwind readnone
declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>) nounwind readnone
+; FUNC-LABEL: @test_bswap_i32
+; SI: buffer_load_dword [[VAL:v[0-9]+]]
+; SI-DAG: v_alignbit_b32 [[TMP0:v[0-9]+]], [[VAL]], [[VAL]], 8
+; SI-DAG: v_alignbit_b32 [[TMP1:v[0-9]+]], [[VAL]], [[VAL]], 24
+; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0xff00ff
+; SI: v_bfi_b32 [[RESULT:v[0-9]+]], [[K]], [[TMP1]], [[TMP0]]
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
define void @test_bswap_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%val = load i32 addrspace(1)* %in, align 4
%bswap = call i32 @llvm.bswap.i32(i32 %val) nounwind readnone
@@ -14,6 +23,14 @@ define void @test_bswap_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
ret void
}
+; FUNC-LABEL: @test_bswap_v2i32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI: s_endpgm
define void @test_bswap_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) nounwind {
%val = load <2 x i32> addrspace(1)* %in, align 8
%bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %val) nounwind readnone
@@ -21,6 +38,20 @@ define void @test_bswap_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(
ret void
}
+; FUNC-LABEL: @test_bswap_v4i32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI: s_endpgm
define void @test_bswap_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) nounwind {
%val = load <4 x i32> addrspace(1)* %in, align 16
%bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) nounwind readnone
@@ -28,6 +59,39 @@ define void @test_bswap_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(
ret void
}
+; FUNC-LABEL: @test_bswap_v8i32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI: s_endpgm
+define void @test_bswap_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) nounwind {
+ %val = load <8 x i32> addrspace(1)* %in, align 32
+ %bswap = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %val) nounwind readnone
+ store <8 x i32> %bswap, <8 x i32> addrspace(1)* %out, align 32
+ ret void
+}
+
define void @test_bswap_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
%val = load i64 addrspace(1)* %in, align 8
%bswap = call i64 @llvm.bswap.i64(i64 %val) nounwind readnone
diff --git a/test/CodeGen/R600/build_vector.ll b/test/CodeGen/R600/build_vector.ll
index 8179de13e869..a0ebe089bd5a 100644
--- a/test/CodeGen/R600/build_vector.ll
+++ b/test/CodeGen/R600/build_vector.ll
@@ -1,32 +1,32 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
-; R600-CHECK: @build_vector2
+; R600-CHECK: {{^}}build_vector2:
; R600-CHECK: MOV
; R600-CHECK: MOV
; R600-CHECK-NOT: MOV
-; SI-CHECK: @build_vector2
-; SI-CHECK-DAG: V_MOV_B32_e32 v[[X:[0-9]]], 5
-; SI-CHECK-DAG: V_MOV_B32_e32 v[[Y:[0-9]]], 6
-; SI-CHECK: BUFFER_STORE_DWORDX2 v{{\[}}[[X]]:[[Y]]{{\]}}
+; SI-CHECK: {{^}}build_vector2:
+; SI-CHECK-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5
+; SI-CHECK-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6
+; SI-CHECK: buffer_store_dwordx2 v{{\[}}[[X]]:[[Y]]{{\]}}
define void @build_vector2 (<2 x i32> addrspace(1)* %out) {
entry:
store <2 x i32> <i32 5, i32 6>, <2 x i32> addrspace(1)* %out
ret void
}
-; R600-CHECK: @build_vector4
+; R600-CHECK: {{^}}build_vector4:
; R600-CHECK: MOV
; R600-CHECK: MOV
; R600-CHECK: MOV
; R600-CHECK: MOV
; R600-CHECK-NOT: MOV
-; SI-CHECK: @build_vector4
-; SI-CHECK-DAG: V_MOV_B32_e32 v[[X:[0-9]]], 5
-; SI-CHECK-DAG: V_MOV_B32_e32 v[[Y:[0-9]]], 6
-; SI-CHECK-DAG: V_MOV_B32_e32 v[[Z:[0-9]]], 7
-; SI-CHECK-DAG: V_MOV_B32_e32 v[[W:[0-9]]], 8
-; SI-CHECK: BUFFER_STORE_DWORDX4 v{{\[}}[[X]]:[[W]]{{\]}}
+; SI-CHECK: {{^}}build_vector4:
+; SI-CHECK-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5
+; SI-CHECK-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6
+; SI-CHECK-DAG: v_mov_b32_e32 v[[Z:[0-9]]], 7
+; SI-CHECK-DAG: v_mov_b32_e32 v[[W:[0-9]]], 8
+; SI-CHECK: buffer_store_dwordx4 v{{\[}}[[X]]:[[W]]{{\]}}
define void @build_vector4 (<4 x i32> addrspace(1)* %out) {
entry:
store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> addrspace(1)* %out
diff --git a/test/CodeGen/R600/call.ll b/test/CodeGen/R600/call.ll
index d80347490b39..1afe98ba5951 100644
--- a/test/CodeGen/R600/call.ll
+++ b/test/CodeGen/R600/call.ll
@@ -1,7 +1,7 @@
-; RUN: not llc -march=r600 -mcpu=SI -verify-machineinstrs< %s 2>&1 | FileCheck %s
+; RUN: not llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s 2>&1 | FileCheck %s
; RUN: not llc -march=r600 -mcpu=cypress < %s 2>&1 | FileCheck %s
-; CHECK: error: unsupported call to function defined_function in test_call
+; CHECK: error: unsupported call to function external_function in test_call_external
declare i32 @external_function(i32) nounwind
diff --git a/test/CodeGen/R600/call_fs.ll b/test/CodeGen/R600/call_fs.ll
index f7c4e5b22cb1..7df22402cff9 100644
--- a/test/CodeGen/R600/call_fs.ll
+++ b/test/CodeGen/R600/call_fs.ll
@@ -2,10 +2,10 @@
; RUN: llc < %s -march=r600 -mcpu=redwood -show-mc-encoding -o - | FileCheck --check-prefix=EG-CHECK %s
; RUN: llc < %s -march=r600 -mcpu=rv710 -show-mc-encoding -o - | FileCheck --check-prefix=R600-CHECK %s
-; EG-CHECK: @call_fs
+; EG-CHECK: {{^}}call_fs:
; EG-CHECK: .long 257
; EG-CHECK: CALL_FS ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0xc0,0x84]
-; R600-CHECK: @call_fs
+; R600-CHECK: {{^}}call_fs:
; R600-CHECK: .long 257
; R600-CHECK:CALL_FS ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x89]
diff --git a/test/CodeGen/R600/cayman-loop-bug.ll b/test/CodeGen/R600/cayman-loop-bug.ll
index a87352895eb3..c7b8c4037316 100644
--- a/test/CodeGen/R600/cayman-loop-bug.ll
+++ b/test/CodeGen/R600/cayman-loop-bug.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s
-; CHECK-LABEL: @main
+; CHECK-LABEL: {{^}}main:
; CHECK: LOOP_START_DX10
; CHECK: ALU_PUSH_BEFORE
; CHECK: LOOP_START_DX10
diff --git a/test/CodeGen/R600/cf-stack-bug.ll b/test/CodeGen/R600/cf-stack-bug.ll
index c3a4612e6ac9..02c87d76bb20 100644
--- a/test/CodeGen/R600/cf-stack-bug.ll
+++ b/test/CodeGen/R600/cf-stack-bug.ll
@@ -17,7 +17,7 @@
; BUG64-NOT: Applying bug work-around
; BUG32-NOT: Applying bug work-around
; NOBUG-NOT: Applying bug work-around
-; FUNC-LABEL: @nested3
+; FUNC-LABEL: {{^}}nested3:
define void @nested3(i32 addrspace(1)* %out, i32 %cond) {
entry:
%0 = icmp sgt i32 %cond, 0
@@ -50,7 +50,7 @@ end:
; BUG64: Applying bug work-around
; BUG32-NOT: Applying bug work-around
; NOBUG-NOT: Applying bug work-around
-; FUNC-LABEL: @nested4
+; FUNC-LABEL: {{^}}nested4:
define void @nested4(i32 addrspace(1)* %out, i32 %cond) {
entry:
%0 = icmp sgt i32 %cond, 0
@@ -91,7 +91,7 @@ end:
; BUG64: Applying bug work-around
; BUG32-NOT: Applying bug work-around
; NOBUG-NOT: Applying bug work-around
-; FUNC-LABEL: @nested7
+; FUNC-LABEL: {{^}}nested7:
define void @nested7(i32 addrspace(1)* %out, i32 %cond) {
entry:
%0 = icmp sgt i32 %cond, 0
@@ -156,7 +156,7 @@ end:
; BUG64: Applying bug work-around
; BUG32: Applying bug work-around
; NOBUG-NOT: Applying bug work-around
-; FUNC-LABEL: @nested8
+; FUNC-LABEL: {{^}}nested8:
define void @nested8(i32 addrspace(1)* %out, i32 %cond) {
entry:
%0 = icmp sgt i32 %cond, 0
diff --git a/test/CodeGen/R600/codegen-prepare-addrmode-sext.ll b/test/CodeGen/R600/codegen-prepare-addrmode-sext.ll
index f8b4a61a7db5..e16a397bb5a4 100644
--- a/test/CodeGen/R600/codegen-prepare-addrmode-sext.ll
+++ b/test/CodeGen/R600/codegen-prepare-addrmode-sext.ll
@@ -1,14 +1,15 @@
-; RUN: opt -codegenprepare -S -o - %s | FileCheck --check-prefix=OPT --check-prefix=FUNC %s
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-LLC --check-prefix=FUNC %s
+; RUN: opt -codegenprepare -S -o - %s | FileCheck --check-prefix=OPT %s
+; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-LLC %s
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "r600--"
-; FUNC-LABEL: @test
+; OPT-LABEL: @test
; OPT: mul nsw i32
; OPT-NEXT: sext
-; SI-LLC: V_MUL_LO_I32
-; SI-LLC-NOT: V_MUL_HI
+; SI-LLC-LABEL: {{^}}test:
+; SI-LLC: s_mul_i32
+; SI-LLC-NOT: mul
define void @test(i8 addrspace(1)* nocapture readonly %in, i32 %a, i8 %b) {
entry:
%0 = mul nsw i32 %a, 3
diff --git a/test/CodeGen/R600/combine_vloads.ll b/test/CodeGen/R600/combine_vloads.ll
index f8ec712c1ec8..38420b25cba9 100644
--- a/test/CodeGen/R600/combine_vloads.ll
+++ b/test/CodeGen/R600/combine_vloads.ll
@@ -9,7 +9,7 @@
; 128-bit loads instead of many 8-bit
-; EG-LABEL: @combine_vloads:
+; EG-LABEL: {{^}}combine_vloads:
; EG: VTX_READ_128
; EG: VTX_READ_128
define void @combine_vloads(<8 x i8> addrspace(1)* nocapture %src, <8 x i8> addrspace(1)* nocapture %result) nounwind {
diff --git a/test/CodeGen/R600/commute_modifiers.ll b/test/CodeGen/R600/commute_modifiers.ll
new file mode 100644
index 000000000000..6fddb6d595c9
--- /dev/null
+++ b/test/CodeGen/R600/commute_modifiers.ll
@@ -0,0 +1,181 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.r600.read.tidig.x() #1
+declare float @llvm.fabs.f32(float) #1
+declare float @llvm.fma.f32(float, float, float) nounwind readnone
+
+; FUNC-LABEL: @commute_add_imm_fabs_f32
+; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: v_add_f32_e64 [[REG:v[0-9]+]], 2.0, |[[X]]|
+; SI-NEXT: buffer_store_dword [[REG]]
+define void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+ %x = load float addrspace(1)* %gep.0
+ %x.fabs = call float @llvm.fabs.f32(float %x) #1
+ %z = fadd float 2.0, %x.fabs
+ store float %z, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: @commute_mul_imm_fneg_fabs_f32
+; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: v_mul_f32_e64 [[REG:v[0-9]+]], -4.0, |[[X]]|
+; SI-NEXT: buffer_store_dword [[REG]]
+define void @commute_mul_imm_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+ %x = load float addrspace(1)* %gep.0
+ %x.fabs = call float @llvm.fabs.f32(float %x) #1
+ %x.fneg.fabs = fsub float -0.000000e+00, %x.fabs
+ %z = fmul float 4.0, %x.fneg.fabs
+ store float %z, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: @commute_mul_imm_fneg_f32
+; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: v_mul_f32_e32 [[REG:v[0-9]+]], -4.0, [[X]]
+; SI-NEXT: buffer_store_dword [[REG]]
+define void @commute_mul_imm_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+ %x = load float addrspace(1)* %gep.0
+ %x.fneg = fsub float -0.000000e+00, %x
+ %z = fmul float 4.0, %x.fneg
+ store float %z, float addrspace(1)* %out
+ ret void
+}
+
+; FIXME: Should use SGPR for literal.
+; FUNC-LABEL: @commute_add_lit_fabs_f32
+; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: v_mov_b32_e32 [[K:v[0-9]+]], 0x44800000
+; SI: v_add_f32_e64 [[REG:v[0-9]+]], |[[X]]|, [[K]]
+; SI-NEXT: buffer_store_dword [[REG]]
+define void @commute_add_lit_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+ %x = load float addrspace(1)* %gep.0
+ %x.fabs = call float @llvm.fabs.f32(float %x) #1
+ %z = fadd float 1024.0, %x.fabs
+ store float %z, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: @commute_add_fabs_f32
+; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI: v_add_f32_e64 [[REG:v[0-9]+]], [[X]], |[[Y]]|
+; SI-NEXT: buffer_store_dword [[REG]]
+define void @commute_add_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %x = load float addrspace(1)* %gep.0
+ %y = load float addrspace(1)* %gep.1
+ %y.fabs = call float @llvm.fabs.f32(float %y) #1
+ %z = fadd float %x, %y.fabs
+ store float %z, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: @commute_mul_fneg_f32
+; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI: v_mul_f32_e64 [[REG:v[0-9]+]], [[X]], -[[Y]]
+; SI-NEXT: buffer_store_dword [[REG]]
+define void @commute_mul_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %x = load float addrspace(1)* %gep.0
+ %y = load float addrspace(1)* %gep.1
+ %y.fneg = fsub float -0.000000e+00, %y
+ %z = fmul float %x, %y.fneg
+ store float %z, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: @commute_mul_fabs_fneg_f32
+; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI: v_mul_f32_e64 [[REG:v[0-9]+]], [[X]], -|[[Y]]|
+; SI-NEXT: buffer_store_dword [[REG]]
+define void @commute_mul_fabs_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %x = load float addrspace(1)* %gep.0
+ %y = load float addrspace(1)* %gep.1
+ %y.fabs = call float @llvm.fabs.f32(float %y) #1
+ %y.fabs.fneg = fsub float -0.000000e+00, %y.fabs
+ %z = fmul float %x, %y.fabs.fneg
+ store float %z, float addrspace(1)* %out
+ ret void
+}
+
+; There's no reason to commute this.
+; FUNC-LABEL: @commute_mul_fabs_x_fabs_y_f32
+; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, |[[Y]]|
+; SI-NEXT: buffer_store_dword [[REG]]
+define void @commute_mul_fabs_x_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %x = load float addrspace(1)* %gep.0
+ %y = load float addrspace(1)* %gep.1
+ %x.fabs = call float @llvm.fabs.f32(float %x) #1
+ %y.fabs = call float @llvm.fabs.f32(float %y) #1
+ %z = fmul float %x.fabs, %y.fabs
+ store float %z, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: @commute_mul_fabs_x_fneg_fabs_y_f32
+; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, -|[[Y]]|
+; SI-NEXT: buffer_store_dword [[REG]]
+define void @commute_mul_fabs_x_fneg_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %x = load float addrspace(1)* %gep.0
+ %y = load float addrspace(1)* %gep.1
+ %x.fabs = call float @llvm.fabs.f32(float %x) #1
+ %y.fabs = call float @llvm.fabs.f32(float %y) #1
+ %y.fabs.fneg = fsub float -0.000000e+00, %y.fabs
+ %z = fmul float %x.fabs, %y.fabs.fneg
+ store float %z, float addrspace(1)* %out
+ ret void
+}
+
+; Make sure we commute the multiply part for the constant in src0 even
+; though we have negate modifier on src2.
+
+; SI-LABEL: {{^}}fma_a_2.0_neg_b_f32
+; SI-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], |[[R2]]|
+; SI: buffer_store_dword [[RESULT]]
+define void @fma_a_2.0_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+
+ %r1 = load float addrspace(1)* %gep.0
+ %r2 = load float addrspace(1)* %gep.1
+
+ %r2.fabs = call float @llvm.fabs.f32(float %r2)
+
+ %r3 = tail call float @llvm.fma.f32(float %r1, float 2.0, float %r2.fabs)
+ store float %r3, float addrspace(1)* %gep.out
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/complex-folding.ll b/test/CodeGen/R600/complex-folding.ll
index 99f0d99b3529..a5399a71324c 100644
--- a/test/CodeGen/R600/complex-folding.ll
+++ b/test/CodeGen/R600/complex-folding.ll
@@ -1,6 +1,6 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-; CHECK: @main
+; CHECK: {{^}}main:
; CHECK-NOT: MOV
define void @main(<4 x float> inreg %reg0) #0 {
entry:
diff --git a/test/CodeGen/R600/concat_vectors.ll b/test/CodeGen/R600/concat_vectors.ll
index 9abc5a627c1c..4c5b9c959516 100644
--- a/test/CodeGen/R600/concat_vectors.ll
+++ b/test/CodeGen/R600/concat_vectors.ll
@@ -1,247 +1,282 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; FUNC-LABEL: @test_concat_v1i32
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v1i32:
+; 0x80f000 is the high 32 bits of the resource descriptor used by MUBUF
+; instructions that access scratch memory. Bit 23, which is the add_tid_enable
+; bit, is only set for scratch access, so we can check for the absence of this
+; value if we want to ensure scratch memory is not being used.
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v1i32(<2 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind {
%concat = shufflevector <1 x i32> %a, <1 x i32> %b, <2 x i32> <i32 0, i32 1>
store <2 x i32> %concat, <2 x i32> addrspace(1)* %out, align 8
ret void
}
-; FUNC-LABEL: @test_concat_v2i32
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v2i32:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v2i32(<4 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
%concat = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
store <4 x i32> %concat, <4 x i32> addrspace(1)* %out, align 16
ret void
}
-; FUNC-LABEL: @test_concat_v4i32
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v4i32:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v4i32(<8 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
%concat = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
store <8 x i32> %concat, <8 x i32> addrspace(1)* %out, align 32
ret void
}
-; FUNC-LABEL: @test_concat_v8i32
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v8i32:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v8i32(<16 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) nounwind {
%concat = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
store <16 x i32> %concat, <16 x i32> addrspace(1)* %out, align 64
ret void
}
-; FUNC-LABEL: @test_concat_v16i32
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v16i32:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v16i32(<32 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) nounwind {
%concat = shufflevector <16 x i32> %a, <16 x i32> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
store <32 x i32> %concat, <32 x i32> addrspace(1)* %out, align 128
ret void
}
-; FUNC-LABEL: @test_concat_v1f32
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v1f32:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v1f32(<2 x float> addrspace(1)* %out, <1 x float> %a, <1 x float> %b) nounwind {
%concat = shufflevector <1 x float> %a, <1 x float> %b, <2 x i32> <i32 0, i32 1>
store <2 x float> %concat, <2 x float> addrspace(1)* %out, align 8
ret void
}
-; FUNC-LABEL: @test_concat_v2f32
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v2f32:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v2f32(<4 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind {
%concat = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
store <4 x float> %concat, <4 x float> addrspace(1)* %out, align 16
ret void
}
-; FUNC-LABEL: @test_concat_v4f32
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v4f32:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v4f32(<8 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind {
%concat = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
store <8 x float> %concat, <8 x float> addrspace(1)* %out, align 32
ret void
}
-; FUNC-LABEL: @test_concat_v8f32
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v8f32:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v8f32(<16 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind {
%concat = shufflevector <8 x float> %a, <8 x float> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
store <16 x float> %concat, <16 x float> addrspace(1)* %out, align 64
ret void
}
-; FUNC-LABEL: @test_concat_v16f32
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v16f32:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v16f32(<32 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind {
%concat = shufflevector <16 x float> %a, <16 x float> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
store <32 x float> %concat, <32 x float> addrspace(1)* %out, align 128
ret void
}
-; FUNC-LABEL: @test_concat_v1i64
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v1i64:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v1i64(<2 x double> addrspace(1)* %out, <1 x double> %a, <1 x double> %b) nounwind {
%concat = shufflevector <1 x double> %a, <1 x double> %b, <2 x i32> <i32 0, i32 1>
store <2 x double> %concat, <2 x double> addrspace(1)* %out, align 16
ret void
}
-; FUNC-LABEL: @test_concat_v2i64
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v2i64:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v2i64(<4 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind {
%concat = shufflevector <2 x double> %a, <2 x double> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
store <4 x double> %concat, <4 x double> addrspace(1)* %out, align 32
ret void
}
-; FUNC-LABEL: @test_concat_v4i64
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v4i64:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v4i64(<8 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind {
%concat = shufflevector <4 x double> %a, <4 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
store <8 x double> %concat, <8 x double> addrspace(1)* %out, align 64
ret void
}
-; FUNC-LABEL: @test_concat_v8i64
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v8i64:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v8i64(<16 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind {
%concat = shufflevector <8 x double> %a, <8 x double> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
store <16 x double> %concat, <16 x double> addrspace(1)* %out, align 128
ret void
}
-; FUNC-LABEL: @test_concat_v16i64
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v16i64:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v16i64(<32 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind {
%concat = shufflevector <16 x double> %a, <16 x double> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
store <32 x double> %concat, <32 x double> addrspace(1)* %out, align 256
ret void
}
-; FUNC-LABEL: @test_concat_v1f64
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v1f64:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v1f64(<2 x double> addrspace(1)* %out, <1 x double> %a, <1 x double> %b) nounwind {
%concat = shufflevector <1 x double> %a, <1 x double> %b, <2 x i32> <i32 0, i32 1>
store <2 x double> %concat, <2 x double> addrspace(1)* %out, align 16
ret void
}
-; FUNC-LABEL: @test_concat_v2f64
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v2f64:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v2f64(<4 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind {
%concat = shufflevector <2 x double> %a, <2 x double> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
store <4 x double> %concat, <4 x double> addrspace(1)* %out, align 32
ret void
}
-; FUNC-LABEL: @test_concat_v4f64
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v4f64:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v4f64(<8 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind {
%concat = shufflevector <4 x double> %a, <4 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
store <8 x double> %concat, <8 x double> addrspace(1)* %out, align 64
ret void
}
-; FUNC-LABEL: @test_concat_v8f64
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v8f64:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v8f64(<16 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind {
%concat = shufflevector <8 x double> %a, <8 x double> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
store <16 x double> %concat, <16 x double> addrspace(1)* %out, align 128
ret void
}
-; FUNC-LABEL: @test_concat_v16f64
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v16f64:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v16f64(<32 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind {
%concat = shufflevector <16 x double> %a, <16 x double> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
store <32 x double> %concat, <32 x double> addrspace(1)* %out, align 256
ret void
}
-; FUNC-LABEL: @test_concat_v1i1
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v1i1:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v1i1(<2 x i1> addrspace(1)* %out, <1 x i1> %a, <1 x i1> %b) nounwind {
%concat = shufflevector <1 x i1> %a, <1 x i1> %b, <2 x i32> <i32 0, i32 1>
store <2 x i1> %concat, <2 x i1> addrspace(1)* %out
ret void
}
-; FUNC-LABEL: @test_concat_v2i1
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v2i1:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v2i1(<4 x i1> addrspace(1)* %out, <2 x i1> %a, <2 x i1> %b) nounwind {
%concat = shufflevector <2 x i1> %a, <2 x i1> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
store <4 x i1> %concat, <4 x i1> addrspace(1)* %out
ret void
}
-; FUNC-LABEL: @test_concat_v4i1
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v4i1:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v4i1(<8 x i1> addrspace(1)* %out, <4 x i1> %a, <4 x i1> %b) nounwind {
%concat = shufflevector <4 x i1> %a, <4 x i1> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
store <8 x i1> %concat, <8 x i1> addrspace(1)* %out
ret void
}
-; FUNC-LABEL: @test_concat_v8i1
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v8i1:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v8i1(<16 x i1> addrspace(1)* %out, <8 x i1> %a, <8 x i1> %b) nounwind {
%concat = shufflevector <8 x i1> %a, <8 x i1> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
store <16 x i1> %concat, <16 x i1> addrspace(1)* %out
ret void
}
-; FUNC-LABEL: @test_concat_v16i1
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v16i1:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v16i1(<32 x i1> addrspace(1)* %out, <16 x i1> %a, <16 x i1> %b) nounwind {
%concat = shufflevector <16 x i1> %a, <16 x i1> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
store <32 x i1> %concat, <32 x i1> addrspace(1)* %out
ret void
}
-; FUNC-LABEL: @test_concat_v32i1
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v32i1:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v32i1(<64 x i1> addrspace(1)* %out, <32 x i1> %a, <32 x i1> %b) nounwind {
%concat = shufflevector <32 x i1> %a, <32 x i1> %b, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
store <64 x i1> %concat, <64 x i1> addrspace(1)* %out
ret void
}
-; FUNC-LABEL: @test_concat_v1i16
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v1i16:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v1i16(<2 x i16> addrspace(1)* %out, <1 x i16> %a, <1 x i16> %b) nounwind {
%concat = shufflevector <1 x i16> %a, <1 x i16> %b, <2 x i32> <i32 0, i32 1>
store <2 x i16> %concat, <2 x i16> addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @test_concat_v2i16
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v2i16:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v2i16(<4 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) nounwind {
%concat = shufflevector <2 x i16> %a, <2 x i16> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
store <4 x i16> %concat, <4 x i16> addrspace(1)* %out, align 8
ret void
}
-; FUNC-LABEL: @test_concat_v4i16
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v4i16:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v4i16(<8 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b) nounwind {
%concat = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
store <8 x i16> %concat, <8 x i16> addrspace(1)* %out, align 16
ret void
}
-; FUNC-LABEL: @test_concat_v8i16
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v8i16:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v8i16(<16 x i16> addrspace(1)* %out, <8 x i16> %a, <8 x i16> %b) nounwind {
%concat = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
store <16 x i16> %concat, <16 x i16> addrspace(1)* %out, align 32
ret void
}
-; FUNC-LABEL: @test_concat_v16i16
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v16i16:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
define void @test_concat_v16i16(<32 x i16> addrspace(1)* %out, <16 x i16> %a, <16 x i16> %b) nounwind {
%concat = shufflevector <16 x i16> %a, <16 x i16> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
store <32 x i16> %concat, <32 x i16> addrspace(1)* %out, align 64
diff --git a/test/CodeGen/R600/copy-illegal-type.ll b/test/CodeGen/R600/copy-illegal-type.ll
index f7c2321ae8fe..2dff24c432b1 100644
--- a/test/CodeGen/R600/copy-illegal-type.ll
+++ b/test/CodeGen/R600/copy-illegal-type.ll
@@ -1,20 +1,20 @@
-; RUN: llc -march=r600 -mcpu=tahiti < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; FUNC-LABEL: @test_copy_v4i8
-; SI: BUFFER_LOAD_DWORD [[REG:v[0-9]+]]
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}test_copy_v4i8:
+; SI: buffer_load_dword [[REG:v[0-9]+]]
+; SI: buffer_store_dword [[REG]]
+; SI: s_endpgm
define void @test_copy_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8> addrspace(1)* %in, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @test_copy_v4i8_x2
-; SI: BUFFER_LOAD_DWORD [[REG:v[0-9]+]]
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}test_copy_v4i8_x2:
+; SI: buffer_load_dword [[REG:v[0-9]+]]
+; SI: buffer_store_dword [[REG]]
+; SI: buffer_store_dword [[REG]]
+; SI: s_endpgm
define void @test_copy_v4i8_x2(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8> addrspace(1)* %in, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
@@ -22,12 +22,12 @@ define void @test_copy_v4i8_x2(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(
ret void
}
-; FUNC-LABEL: @test_copy_v4i8_x3
-; SI: BUFFER_LOAD_DWORD [[REG:v[0-9]+]]
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}test_copy_v4i8_x3:
+; SI: buffer_load_dword [[REG:v[0-9]+]]
+; SI: buffer_store_dword [[REG]]
+; SI: buffer_store_dword [[REG]]
+; SI: buffer_store_dword [[REG]]
+; SI: s_endpgm
define void @test_copy_v4i8_x3(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8> addrspace(1)* %in, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
@@ -36,13 +36,13 @@ define void @test_copy_v4i8_x3(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(
ret void
}
-; FUNC-LABEL: @test_copy_v4i8_x4
-; SI: BUFFER_LOAD_DWORD [[REG:v[0-9]+]]
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}test_copy_v4i8_x4:
+; SI: buffer_load_dword [[REG:v[0-9]+]]
+; SI: buffer_store_dword [[REG]]
+; SI: buffer_store_dword [[REG]]
+; SI: buffer_store_dword [[REG]]
+; SI: buffer_store_dword [[REG]]
+; SI: s_endpgm
define void @test_copy_v4i8_x4(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %out3, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8> addrspace(1)* %in, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
@@ -52,34 +52,34 @@ define void @test_copy_v4i8_x4(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(
ret void
}
-; FUNC-LABEL: @test_copy_v4i8_extra_use
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI-DAG: V_ADD
-; SI-DAG: V_ADD
-; SI-DAG: V_ADD
-; SI-DAG: V_ADD
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI_DAG: BUFFER_STORE_BYTE
+; FUNC-LABEL: {{^}}test_copy_v4i8_extra_use:
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI-DAG: v_add
+; SI-DAG: v_add
+; SI-DAG: v_add
+; SI-DAG: v_add
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI_DAG: buffer_store_byte
; After scalarizing v4i8 loads is fixed.
-; XSI: BUFFER_LOAD_DWORD
+; XSI: buffer_load_dword
; XSI: V_BFE
; XSI: V_ADD
; XSI: V_ADD
; XSI: V_ADD
-; XSI: BUFFER_STORE_DWORD
-; XSI: BUFFER_STORE_DWORD
+; XSI: buffer_store_dword
+; XSI: buffer_store_dword
-; SI: S_ENDPGM
+; SI: s_endpgm
define void @test_copy_v4i8_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8> addrspace(1)* %in, align 4
%add = add <4 x i8> %val, <i8 9, i8 9, i8 9, i8 9>
@@ -88,36 +88,36 @@ define void @test_copy_v4i8_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> add
ret void
}
-; FUNC-LABEL: @test_copy_v4i8_x2_extra_use
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI-DAG: V_ADD
-; SI-DAG: V_ADD
-; SI-DAG: V_ADD
-; SI-DAG: V_ADD
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI_DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI_DAG: BUFFER_STORE_BYTE
+; FUNC-LABEL: {{^}}test_copy_v4i8_x2_extra_use:
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI-DAG: v_add
+; SI-DAG: v_add
+; SI-DAG: v_add
+; SI-DAG: v_add
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI_DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI_DAG: buffer_store_byte
-; XSI: BUFFER_LOAD_DWORD
+; XSI: buffer_load_dword
; XSI: BFE
-; XSI: BUFFER_STORE_DWORD
+; XSI: buffer_store_dword
; XSI: V_ADD
-; XSI: BUFFER_STORE_DWORD
-; XSI-NEXT: BUFFER_STORE_DWORD
+; XSI: buffer_store_dword
+; XSI-NEXT: buffer_store_dword
-; SI: S_ENDPGM
+; SI: s_endpgm
define void @test_copy_v4i8_x2_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8> addrspace(1)* %in, align 4
%add = add <4 x i8> %val, <i8 9, i8 9, i8 9, i8 9>
@@ -127,38 +127,38 @@ define void @test_copy_v4i8_x2_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8>
ret void
}
-; FUNC-LABEL: @test_copy_v3i8
-; SI-NOT: BFE
-; SI-NOT: BFI
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}test_copy_v3i8:
+; SI-NOT: bfe
+; SI-NOT: bfi
+; SI: s_endpgm
define void @test_copy_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) nounwind {
%val = load <3 x i8> addrspace(1)* %in, align 4
store <3 x i8> %val, <3 x i8> addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @test_copy_v4i8_volatile_load
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}test_copy_v4i8_volatile_load:
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: s_endpgm
define void @test_copy_v4i8_volatile_load(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
%val = load volatile <4 x i8> addrspace(1)* %in, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @test_copy_v4i8_volatile_store
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_STORE_BYTE
-; SI: BUFFER_STORE_BYTE
-; SI: BUFFER_STORE_BYTE
-; SI: BUFFER_STORE_BYTE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}test_copy_v4i8_volatile_store:
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: s_endpgm
define void @test_copy_v4i8_volatile_store(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
%val = load <4 x i8> addrspace(1)* %in, align 4
store volatile <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/copy-to-reg.ll b/test/CodeGen/R600/copy-to-reg.ll
new file mode 100644
index 000000000000..4a4143567102
--- /dev/null
+++ b/test/CodeGen/R600/copy-to-reg.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=amdgcn -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs < %s
+
+; Test that CopyToReg instructions don't have non-register operands prior
+; to being emitted.
+
+; Make sure this doesn't crash
+; CHECK-LABEL: {{^}}copy_to_reg_frameindex:
+define void @copy_to_reg_frameindex(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
+entry:
+ %alloca = alloca [16 x i32]
+ br label %loop
+
+loop:
+ %inc = phi i32 [0, %entry], [%inc.i, %loop]
+ %ptr = getelementptr [16 x i32]* %alloca, i32 0, i32 %inc
+ store i32 %inc, i32* %ptr
+ %inc.i = add i32 %inc, 1
+ %cnd = icmp uge i32 %inc.i, 16
+ br i1 %cnd, label %done, label %loop
+
+done:
+ %tmp0 = getelementptr [16 x i32]* %alloca, i32 0, i32 0
+ %tmp1 = load i32* %tmp0
+ store i32 %tmp1, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/ctlz_zero_undef.ll b/test/CodeGen/R600/ctlz_zero_undef.ll
index 1340ef98c605..090610d4aac2 100644
--- a/test/CodeGen/R600/ctlz_zero_undef.ll
+++ b/test/CodeGen/R600/ctlz_zero_undef.ll
@@ -1,16 +1,16 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
-; FUNC-LABEL: @s_ctlz_zero_undef_i32:
-; SI: S_LOAD_DWORD [[VAL:s[0-9]+]],
-; SI: S_FLBIT_I32_B32 [[SRESULT:s[0-9]+]], [[VAL]]
-; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
-; SI: BUFFER_STORE_DWORD [[VRESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}s_ctlz_zero_undef_i32:
+; SI: s_load_dword [[VAL:s[0-9]+]],
+; SI: s_flbit_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]]
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: buffer_store_dword [[VRESULT]],
+; SI: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
define void @s_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
@@ -19,11 +19,11 @@ define void @s_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nou
ret void
}
-; FUNC-LABEL: @v_ctlz_zero_undef_i32:
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_FFBH_U32_e32 [[RESULT:v[0-9]+]], [[VAL]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32:
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
define void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
@@ -33,12 +33,12 @@ define void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace
ret void
}
-; FUNC-LABEL: @v_ctlz_zero_undef_v2i32:
-; SI: BUFFER_LOAD_DWORDX2
-; SI: V_FFBH_U32_e32
-; SI: V_FFBH_U32_e32
-; SI: BUFFER_STORE_DWORDX2
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctlz_zero_undef_v2i32:
+; SI: buffer_load_dwordx2
+; SI: v_ffbh_u32_e32
+; SI: v_ffbh_u32_e32
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
@@ -49,14 +49,14 @@ define void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x
ret void
}
-; FUNC-LABEL: @v_ctlz_zero_undef_v4i32:
-; SI: BUFFER_LOAD_DWORDX4
-; SI: V_FFBH_U32_e32
-; SI: V_FFBH_U32_e32
-; SI: V_FFBH_U32_e32
-; SI: V_FFBH_U32_e32
-; SI: BUFFER_STORE_DWORDX4
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctlz_zero_undef_v4i32:
+; SI: buffer_load_dwordx4
+; SI: v_ffbh_u32_e32
+; SI: v_ffbh_u32_e32
+; SI: v_ffbh_u32_e32
+; SI: v_ffbh_u32_e32
+; SI: buffer_store_dwordx4
+; SI: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
diff --git a/test/CodeGen/R600/ctpop.ll b/test/CodeGen/R600/ctpop.ll
index 22a3022145f1..a47bc876cb96 100644
--- a/test/CodeGen/R600/ctpop.ll
+++ b/test/CodeGen/R600/ctpop.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
@@ -7,12 +7,12 @@ declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) nounwind readnone
declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) nounwind readnone
declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) nounwind readnone
-; FUNC-LABEL: @s_ctpop_i32:
-; SI: S_LOAD_DWORD [[SVAL:s[0-9]+]],
-; SI: S_BCNT1_I32_B32 [[SRESULT:s[0-9]+]], [[SVAL]]
-; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
-; SI: BUFFER_STORE_DWORD [[VRESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}s_ctpop_i32:
+; SI: s_load_dword [[SVAL:s[0-9]+]],
+; SI: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[SVAL]]
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: buffer_store_dword [[VRESULT]],
+; SI: s_endpgm
; EG: BCNT_INT
define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
@@ -22,12 +22,12 @@ define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
}
; XXX - Why 0 in register?
-; FUNC-LABEL: @v_ctpop_i32:
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0
-; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VZERO]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctpop_i32:
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0
+; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VZERO]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
@@ -37,15 +37,14 @@ define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noali
ret void
}
-; FUNC-LABEL: @v_ctpop_add_chain_i32
-; SI: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]],
-; SI: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]],
-; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0
-; SI: V_BCNT_U32_B32_e32 [[MIDRESULT:v[0-9]+]], [[VAL1]], [[VZERO]]
-; SI-NOT: ADD
-; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctpop_add_chain_i32:
+; SI: buffer_load_dword [[VAL0:v[0-9]+]],
+; SI: buffer_load_dword [[VAL1:v[0-9]+]],
+; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0
+; SI: v_bcnt_u32_b32_e32 [[MIDRESULT:v[0-9]+]], [[VAL1]], [[VZERO]]
+; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
; EG: BCNT_INT
; EG: BCNT_INT
@@ -59,10 +58,24 @@ define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace
ret void
}
-; FUNC-LABEL: @v_ctpop_v2i32:
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctpop_add_sgpr_i32:
+; SI: buffer_load_dword [[VAL0:v[0-9]+]],
+; SI-NEXT: s_waitcnt
+; SI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}}
+; SI-NEXT: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind {
+ %val0 = load i32 addrspace(1)* %in0, align 4
+ %ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
+ %add = add i32 %ctpop0, %sval
+ store i32 %add, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctpop_v2i32:
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: s_endpgm
; EG: BCNT_INT
; EG: BCNT_INT
@@ -73,12 +86,12 @@ define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrs
ret void
}
-; FUNC-LABEL: @v_ctpop_v4i32:
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctpop_v4i32:
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: s_endpgm
; EG: BCNT_INT
; EG: BCNT_INT
@@ -91,16 +104,16 @@ define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrs
ret void
}
-; FUNC-LABEL: @v_ctpop_v8i32:
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctpop_v8i32:
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: s_endpgm
; EG: BCNT_INT
; EG: BCNT_INT
@@ -117,24 +130,24 @@ define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrs
ret void
}
-; FUNC-LABEL: @v_ctpop_v16i32:
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctpop_v16i32:
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: s_endpgm
; EG: BCNT_INT
; EG: BCNT_INT
@@ -159,11 +172,11 @@ define void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> ad
ret void
}
-; FUNC-LABEL: @v_ctpop_i32_add_inline_constant:
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant:
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
@@ -174,11 +187,11 @@ define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32
ret void
}
-; FUNC-LABEL: @v_ctpop_i32_add_inline_constant_inv:
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant_inv:
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
@@ -189,12 +202,12 @@ define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out,
ret void
}
-; FUNC-LABEL: @v_ctpop_i32_add_literal:
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_MOV_B32_e32 [[LIT:v[0-9]+]], 0x1869f
-; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctpop_i32_add_literal:
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_mov_b32_e32 [[LIT:v[0-9]+]], 0x1869f
+; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%val = load i32 addrspace(1)* %in, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
@@ -203,12 +216,12 @@ define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspa
ret void
}
-; FUNC-LABEL: @v_ctpop_i32_add_var:
-; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI-DAG: S_LOAD_DWORD [[VAR:s[0-9]+]],
-; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctpop_i32_add_var:
+; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]],
+; SI-DAG: s_load_dword [[VAR:s[0-9]+]],
+; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
@@ -219,12 +232,12 @@ define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1
ret void
}
-; FUNC-LABEL: @v_ctpop_i32_add_var_inv:
-; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI-DAG: S_LOAD_DWORD [[VAR:s[0-9]+]],
-; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctpop_i32_add_var_inv:
+; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]],
+; SI-DAG: s_load_dword [[VAR:s[0-9]+]],
+; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
@@ -235,12 +248,12 @@ define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspa
ret void
}
-; FUNC-LABEL: @v_ctpop_i32_add_vvar_inv
-; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], {{.*}} + 0x0
-; SI-DAG: BUFFER_LOAD_DWORD [[VAR:v[0-9]+]], {{.*}} + 0x10
-; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctpop_i32_add_vvar_inv:
+; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], {{0$}}
+; SI-DAG: buffer_load_dword [[VAR:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:16
+; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind {
@@ -256,29 +269,29 @@ define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrsp
; FIXME: We currently disallow SALU instructions in all branches,
; but there are some cases when the should be allowed.
-; FUNC-LABEL: @ctpop_i32_in_br
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
-; EG: BCNT_INT
-define void @ctpop_i32_in_br(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond) {
+; FUNC-LABEL: {{^}}ctpop_i32_in_br:
+; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xd
+; SI: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]]
+; SI: v_mov_b32_e32 [[RESULT]], [[SRESULT]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+; EG: BCNT_INT
+define void @ctpop_i32_in_br(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %ctpop_arg, i32 %cond) {
entry:
- %0 = icmp eq i32 %cond, 0
- br i1 %0, label %if, label %else
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %if, label %else
if:
- %1 = load i32 addrspace(1)* %in
- %2 = call i32 @llvm.ctpop.i32(i32 %1)
+ %tmp2 = call i32 @llvm.ctpop.i32(i32 %ctpop_arg)
br label %endif
else:
- %3 = getelementptr i32 addrspace(1)* %in, i32 1
- %4 = load i32 addrspace(1)* %3
+ %tmp3 = getelementptr i32 addrspace(1)* %in, i32 1
+ %tmp4 = load i32 addrspace(1)* %tmp3
br label %endif
endif:
- %5 = phi i32 [%2, %if], [%4, %else]
- store i32 %5, i32 addrspace(1)* %out
+ %tmp5 = phi i32 [%tmp2, %if], [%tmp4, %else]
+ store i32 %tmp5, i32 addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/ctpop64.ll b/test/CodeGen/R600/ctpop64.ll
index b36ecc68d895..8dfe571d3477 100644
--- a/test/CodeGen/R600/ctpop64.ll
+++ b/test/CodeGen/R600/ctpop64.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
declare i64 @llvm.ctpop.i64(i64) nounwind readnone
declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) nounwind readnone
@@ -6,12 +6,12 @@ declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>) nounwind readnone
declare <8 x i64> @llvm.ctpop.v8i64(<8 x i64>) nounwind readnone
declare <16 x i64> @llvm.ctpop.v16i64(<16 x i64>) nounwind readnone
-; FUNC-LABEL: @s_ctpop_i64:
-; SI: S_LOAD_DWORDX2 [[SVAL:s\[[0-9]+:[0-9]+\]]],
-; SI: S_BCNT1_I32_B64 [[SRESULT:s[0-9]+]], [[SVAL]]
-; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
-; SI: BUFFER_STORE_DWORD [[VRESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}s_ctpop_i64:
+; SI: s_load_dwordx2 [[SVAL:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_bcnt1_i32_b64 [[SRESULT:s[0-9]+]], [[SVAL]]
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: buffer_store_dword [[VRESULT]],
+; SI: s_endpgm
define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
%ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
%truncctpop = trunc i64 %ctpop to i32
@@ -19,13 +19,13 @@ define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
ret void
}
-; FUNC-LABEL: @v_ctpop_i64:
-; SI: BUFFER_LOAD_DWORDX2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
-; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0
-; SI: V_BCNT_U32_B32_e32 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], [[VZERO]]
-; SI-NEXT: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctpop_i64:
+; SI: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
+; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0
+; SI: v_bcnt_u32_b32_e32 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], [[VZERO]]
+; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
%val = load i64 addrspace(1)* %in, align 8
%ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
@@ -34,10 +34,10 @@ define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noali
ret void
}
-; FUNC-LABEL: @s_ctpop_v2i64:
-; SI: S_BCNT1_I32_B64
-; SI: S_BCNT1_I32_B64
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}s_ctpop_v2i64:
+; SI: s_bcnt1_i32_b64
+; SI: s_bcnt1_i32_b64
+; SI: s_endpgm
define void @s_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> %val) nounwind {
%ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone
%truncctpop = trunc <2 x i64> %ctpop to <2 x i32>
@@ -45,12 +45,12 @@ define void @s_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> %val)
ret void
}
-; FUNC-LABEL: @s_ctpop_v4i64:
-; SI: S_BCNT1_I32_B64
-; SI: S_BCNT1_I32_B64
-; SI: S_BCNT1_I32_B64
-; SI: S_BCNT1_I32_B64
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}s_ctpop_v4i64:
+; SI: s_bcnt1_i32_b64
+; SI: s_bcnt1_i32_b64
+; SI: s_bcnt1_i32_b64
+; SI: s_bcnt1_i32_b64
+; SI: s_endpgm
define void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> %val) nounwind {
%ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone
%truncctpop = trunc <4 x i64> %ctpop to <4 x i32>
@@ -58,12 +58,12 @@ define void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> %val)
ret void
}
-; FUNC-LABEL: @v_ctpop_v2i64:
-; SI: V_BCNT_U32_B32
-; SI: V_BCNT_U32_B32
-; SI: V_BCNT_U32_B32
-; SI: V_BCNT_U32_B32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctpop_v2i64:
+; SI: v_bcnt_u32_b32
+; SI: v_bcnt_u32_b32
+; SI: v_bcnt_u32_b32
+; SI: v_bcnt_u32_b32
+; SI: s_endpgm
define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in) nounwind {
%val = load <2 x i64> addrspace(1)* %in, align 16
%ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone
@@ -72,16 +72,16 @@ define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrs
ret void
}
-; FUNC-LABEL: @v_ctpop_v4i64:
-; SI: V_BCNT_U32_B32
-; SI: V_BCNT_U32_B32
-; SI: V_BCNT_U32_B32
-; SI: V_BCNT_U32_B32
-; SI: V_BCNT_U32_B32
-; SI: V_BCNT_U32_B32
-; SI: V_BCNT_U32_B32
-; SI: V_BCNT_U32_B32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctpop_v4i64:
+; SI: v_bcnt_u32_b32
+; SI: v_bcnt_u32_b32
+; SI: v_bcnt_u32_b32
+; SI: v_bcnt_u32_b32
+; SI: v_bcnt_u32_b32
+; SI: v_bcnt_u32_b32
+; SI: v_bcnt_u32_b32
+; SI: v_bcnt_u32_b32
+; SI: s_endpgm
define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrspace(1)* noalias %in) nounwind {
%val = load <4 x i64> addrspace(1)* %in, align 32
%ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone
@@ -93,30 +93,29 @@ define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrs
; FIXME: We currently disallow SALU instructions in all branches,
; but there are some cases when the should be allowed.
-; FUNC-LABEL: @ctpop_i64_in_br
-; SI: V_BCNT_U32_B32_e64 [[BCNT_LO:v[0-9]+]], v{{[0-9]+}}, 0
-; SI: V_BCNT_U32_B32_e32 v[[BCNT:[0-9]+]], v{{[0-9]+}}, [[BCNT_LO]]
-; SI: V_MOV_B32_e32 v[[ZERO:[0-9]+]], 0
-; SI: BUFFER_STORE_DWORDX2 v[
-; SI: [[BCNT]]:[[ZERO]]]
-; SI: S_ENDPGM
-define void @ctpop_i64_in_br(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i32 %cond) {
+; FUNC-LABEL: {{^}}ctpop_i64_in_br:
+; SI: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xd
+; SI: s_bcnt1_i32_b64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]{{\]}}
+; SI: v_mov_b32_e32 v[[VLO:[0-9]+]], [[RESULT]]
+; SI: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HIVAL]]
+; SI: buffer_store_dwordx2 {{v\[}}[[VLO]]:[[VHI]]{{\]}}
+; SI: s_endpgm
+define void @ctpop_i64_in_br(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %ctpop_arg, i32 %cond) {
entry:
- %0 = icmp eq i32 %cond, 0
- br i1 %0, label %if, label %else
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %if, label %else
if:
- %1 = load i64 addrspace(1)* %in
- %2 = call i64 @llvm.ctpop.i64(i64 %1)
+ %tmp2 = call i64 @llvm.ctpop.i64(i64 %ctpop_arg)
br label %endif
else:
- %3 = getelementptr i64 addrspace(1)* %in, i32 1
- %4 = load i64 addrspace(1)* %3
+ %tmp3 = getelementptr i64 addrspace(1)* %in, i32 1
+ %tmp4 = load i64 addrspace(1)* %tmp3
br label %endif
endif:
- %5 = phi i64 [%2, %if], [%4, %else]
- store i64 %5, i64 addrspace(1)* %out
+ %tmp5 = phi i64 [%tmp2, %if], [%tmp4, %else]
+ store i64 %tmp5, i64 addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/cttz-ctlz.ll b/test/CodeGen/R600/cttz-ctlz.ll
new file mode 100644
index 000000000000..6be06d243eaa
--- /dev/null
+++ b/test/CodeGen/R600/cttz-ctlz.ll
@@ -0,0 +1,224 @@
+; RUN: opt -S -codegenprepare -mtriple=r600-unknown-unknown -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=ALL %s
+
+
+define i64 @test1(i64 %A) {
+; ALL-LABEL: @test1(
+; SI: [[CTLZ:%[A-Za-z0-9]+]] = call i64 @llvm.ctlz.i64(i64 %A, i1 false)
+; SI-NEXT: ret i64 [[CTLZ]]
+entry:
+ %tobool = icmp eq i64 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ]
+ ret i64 %cond
+}
+
+
+define i32 @test2(i32 %A) {
+; ALL-LABEL: @test2(
+; SI: [[CTLZ:%[A-Za-z0-9]+]] = call i32 @llvm.ctlz.i32(i32 %A, i1 false)
+; SI-NEXT: ret i32 [[CTLZ]]
+entry:
+ %tobool = icmp eq i32 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+ ret i32 %cond
+}
+
+
+define signext i16 @test3(i16 signext %A) {
+; ALL-LABEL: @test3(
+; SI: [[CTLZ:%[A-Za-z0-9]+]] = call i16 @llvm.ctlz.i16(i16 %A, i1 false)
+; SI-NEXT: ret i16 [[CTLZ]]
+entry:
+ %tobool = icmp eq i16 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i16 [ %0, %cond.true ], [ 16, %entry ]
+ ret i16 %cond
+}
+
+
+define i64 @test1b(i64 %A) {
+; ALL-LABEL: @test1b(
+; SI: [[CTTZ:%[A-Za-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %A, i1 false)
+; SI-NEXT: ret i64 [[CTTZ]]
+entry:
+ %tobool = icmp eq i64 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ]
+ ret i64 %cond
+}
+
+
+define i32 @test2b(i32 %A) {
+; ALL-LABEL: @test2b(
+; SI: [[CTTZ:%[A-Za-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %A, i1 false)
+; SI-NEXT: ret i32 [[CTTZ]]
+entry:
+ %tobool = icmp eq i32 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+ ret i32 %cond
+}
+
+
+define signext i16 @test3b(i16 signext %A) {
+; ALL-LABEL: @test3b(
+; SI: [[CTTZ:%[A-Za-z0-9]+]] = call i16 @llvm.cttz.i16(i16 %A, i1 false)
+; SI-NEXT: ret i16 [[CTTZ]]
+entry:
+ %tobool = icmp eq i16 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i16 [ %0, %cond.true ], [ 16, %entry ]
+ ret i16 %cond
+}
+
+
+define i64 @test1c(i64 %A) {
+; ALL-LABEL: @test1c(
+; ALL: icmp eq i64 %A, 0
+; ALL: call i64 @llvm.ctlz.i64(i64 %A, i1 true)
+entry:
+ %tobool = icmp eq i64 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i64 [ %0, %cond.true ], [ 63, %entry ]
+ ret i64 %cond
+}
+
+define i32 @test2c(i32 %A) {
+; ALL-LABEL: @test2c(
+; ALL: icmp eq i32 %A, 0
+; ALL: call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+entry:
+ %tobool = icmp eq i32 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i32 [ %0, %cond.true ], [ 31, %entry ]
+ ret i32 %cond
+}
+
+
+define signext i16 @test3c(i16 signext %A) {
+; ALL-LABEL: @test3c(
+; ALL: icmp eq i16 %A, 0
+; ALL: call i16 @llvm.ctlz.i16(i16 %A, i1 true)
+entry:
+ %tobool = icmp eq i16 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i16 [ %0, %cond.true ], [ 15, %entry ]
+ ret i16 %cond
+}
+
+
+define i64 @test1d(i64 %A) {
+; ALL-LABEL: @test1d(
+; ALL: icmp eq i64 %A, 0
+; ALL: call i64 @llvm.cttz.i64(i64 %A, i1 true)
+entry:
+ %tobool = icmp eq i64 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i64 [ %0, %cond.true ], [ 63, %entry ]
+ ret i64 %cond
+}
+
+
+define i32 @test2d(i32 %A) {
+; ALL-LABEL: @test2d(
+; ALL: icmp eq i32 %A, 0
+; ALL: call i32 @llvm.cttz.i32(i32 %A, i1 true)
+entry:
+ %tobool = icmp eq i32 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i32 [ %0, %cond.true ], [ 31, %entry ]
+ ret i32 %cond
+}
+
+
+define signext i16 @test3d(i16 signext %A) {
+; ALL-LABEL: @test3d(
+; ALL: icmp eq i16 %A, 0
+; ALL: call i16 @llvm.cttz.i16(i16 %A, i1 true)
+entry:
+ %tobool = icmp eq i16 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i16 [ %0, %cond.true ], [ 15, %entry ]
+ ret i16 %cond
+}
+
+
+declare i64 @llvm.ctlz.i64(i64, i1)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i16 @llvm.ctlz.i16(i16, i1)
+declare i64 @llvm.cttz.i64(i64, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+declare i16 @llvm.cttz.i16(i16, i1)
diff --git a/test/CodeGen/R600/cttz_zero_undef.ll b/test/CodeGen/R600/cttz_zero_undef.ll
index 9c4a3558d094..ab59360694bf 100644
--- a/test/CodeGen/R600/cttz_zero_undef.ll
+++ b/test/CodeGen/R600/cttz_zero_undef.ll
@@ -1,16 +1,16 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) nounwind readnone
declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) nounwind readnone
-; FUNC-LABEL: @s_cttz_zero_undef_i32:
-; SI: S_LOAD_DWORD [[VAL:s[0-9]+]],
-; SI: S_FF1_I32_B32 [[SRESULT:s[0-9]+]], [[VAL]]
-; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
-; SI: BUFFER_STORE_DWORD [[VRESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}s_cttz_zero_undef_i32:
+; SI: s_load_dword [[VAL:s[0-9]+]],
+; SI: s_ff1_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]]
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: buffer_store_dword [[VRESULT]],
+; SI: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
; EG: FFBL_INT {{\*? *}}[[RESULT]]
define void @s_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
@@ -19,11 +19,11 @@ define void @s_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nou
ret void
}
-; FUNC-LABEL: @v_cttz_zero_undef_i32:
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_FFBL_B32_e32 [[RESULT:v[0-9]+]], [[VAL]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_cttz_zero_undef_i32:
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_ffbl_b32_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
; EG: FFBL_INT {{\*? *}}[[RESULT]]
define void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
@@ -33,12 +33,12 @@ define void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace
ret void
}
-; FUNC-LABEL: @v_cttz_zero_undef_v2i32:
-; SI: BUFFER_LOAD_DWORDX2
-; SI: V_FFBL_B32_e32
-; SI: V_FFBL_B32_e32
-; SI: BUFFER_STORE_DWORDX2
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_cttz_zero_undef_v2i32:
+; SI: buffer_load_dwordx2
+; SI: v_ffbl_b32_e32
+; SI: v_ffbl_b32_e32
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
; EG: FFBL_INT {{\*? *}}[[RESULT]]
; EG: FFBL_INT {{\*? *}}[[RESULT]]
@@ -49,14 +49,14 @@ define void @v_cttz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x
ret void
}
-; FUNC-LABEL: @v_cttz_zero_undef_v4i32:
-; SI: BUFFER_LOAD_DWORDX4
-; SI: V_FFBL_B32_e32
-; SI: V_FFBL_B32_e32
-; SI: V_FFBL_B32_e32
-; SI: V_FFBL_B32_e32
-; SI: BUFFER_STORE_DWORDX4
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_cttz_zero_undef_v4i32:
+; SI: buffer_load_dwordx4
+; SI: v_ffbl_b32_e32
+; SI: v_ffbl_b32_e32
+; SI: v_ffbl_b32_e32
+; SI: v_ffbl_b32_e32
+; SI: buffer_store_dwordx4
+; SI: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
; EG: FFBL_INT {{\*? *}}[[RESULT]]
; EG: FFBL_INT {{\*? *}}[[RESULT]]
diff --git a/test/CodeGen/R600/cvt_f32_ubyte.ll b/test/CodeGen/R600/cvt_f32_ubyte.ll
index 06a601065c3e..e26ee12f6f6d 100644
--- a/test/CodeGen/R600/cvt_f32_ubyte.ll
+++ b/test/CodeGen/R600/cvt_f32_ubyte.ll
@@ -1,11 +1,11 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-
-; SI-LABEL: @load_i8_to_f32:
-; SI: BUFFER_LOAD_UBYTE [[LOADREG:v[0-9]+]],
-; SI-NOT: BFE
-; SI-NOT: LSHR
-; SI: V_CVT_F32_UBYTE0_e32 [[CONV:v[0-9]+]], [[LOADREG]]
-; SI: BUFFER_STORE_DWORD [[CONV]],
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+; SI-LABEL: {{^}}load_i8_to_f32:
+; SI: buffer_load_ubyte [[LOADREG:v[0-9]+]],
+; SI-NOT: bfe
+; SI-NOT: lshr
+; SI: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[LOADREG]]
+; SI: buffer_store_dword [[CONV]],
define void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
%load = load i8 addrspace(1)* %in, align 1
%cvt = uitofp i8 %load to float
@@ -13,76 +13,96 @@ define void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* n
ret void
}
-; SI-LABEL: @load_v2i8_to_v2f32:
-; SI: BUFFER_LOAD_USHORT [[LOADREG:v[0-9]+]],
-; SI-NOT: BFE
-; SI-NOT: LSHR
-; SI-NOT: AND
-; SI-DAG: V_CVT_F32_UBYTE1_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
-; SI-DAG: V_CVT_F32_UBYTE0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
-; SI: BUFFER_STORE_DWORDX2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
+; SI-LABEL: {{^}}load_v2i8_to_v2f32:
+; SI: buffer_load_ushort [[LOADREG:v[0-9]+]],
+; SI-NOT: bfe
+; SI-NOT: lshr
+; SI-NOT: and
+; SI-DAG: v_cvt_f32_ubyte1_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
+; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
+; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind {
- %load = load <2 x i8> addrspace(1)* %in, align 1
+ %load = load <2 x i8> addrspace(1)* %in, align 2
%cvt = uitofp <2 x i8> %load to <2 x float>
store <2 x float> %cvt, <2 x float> addrspace(1)* %out, align 16
ret void
}
-; SI-LABEL: @load_v3i8_to_v3f32:
-; SI-NOT: BFE
-; SI-NOT: V_CVT_F32_UBYTE3_e32
-; SI-DAG: V_CVT_F32_UBYTE2_e32
-; SI-DAG: V_CVT_F32_UBYTE1_e32
-; SI-DAG: V_CVT_F32_UBYTE0_e32
-; SI: BUFFER_STORE_DWORDX2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
+; SI-LABEL: {{^}}load_v3i8_to_v3f32:
+; SI-NOT: bfe
+; SI-NOT: v_cvt_f32_ubyte3_e32
+; SI-DAG: v_cvt_f32_ubyte2_e32
+; SI-DAG: v_cvt_f32_ubyte1_e32
+; SI-DAG: v_cvt_f32_ubyte0_e32
+; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> addrspace(1)* noalias %in) nounwind {
- %load = load <3 x i8> addrspace(1)* %in, align 1
+ %load = load <3 x i8> addrspace(1)* %in, align 4
%cvt = uitofp <3 x i8> %load to <3 x float>
store <3 x float> %cvt, <3 x float> addrspace(1)* %out, align 16
ret void
}
-; SI-LABEL: @load_v4i8_to_v4f32:
-; We can't use BUFFER_LOAD_DWORD here, because the load is byte aligned, and
-; BUFFER_LOAD_DWORD requires dword alignment.
-; SI: BUFFER_LOAD_USHORT
-; SI: BUFFER_LOAD_USHORT
-; SI: V_OR_B32_e32 [[LOADREG:v[0-9]+]]
-; SI-NOT: BFE
-; SI-NOT: LSHR
-; SI-DAG: V_CVT_F32_UBYTE3_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
-; SI-DAG: V_CVT_F32_UBYTE2_e32 v{{[0-9]+}}, [[LOADREG]]
-; SI-DAG: V_CVT_F32_UBYTE1_e32 v{{[0-9]+}}, [[LOADREG]]
-; SI-DAG: V_CVT_F32_UBYTE0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
-; SI: BUFFER_STORE_DWORDX4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
+; SI-LABEL: {{^}}load_v4i8_to_v4f32:
+; SI: buffer_load_dword [[LOADREG:v[0-9]+]]
+; SI-NOT: bfe
+; SI-NOT: lshr
+; SI-DAG: v_cvt_f32_ubyte3_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
+; SI-DAG: v_cvt_f32_ubyte2_e32 v{{[0-9]+}}, [[LOADREG]]
+; SI-DAG: v_cvt_f32_ubyte1_e32 v{{[0-9]+}}, [[LOADREG]]
+; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
+; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
+ %load = load <4 x i8> addrspace(1)* %in, align 4
+ %cvt = uitofp <4 x i8> %load to <4 x float>
+ store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
+ ret void
+}
+
+; This should not be adding instructions to shift into the correct
+; position in the word for the component.
+
+; SI-LABEL: {{^}}load_v4i8_to_v4f32_unaligned:
+; SI: buffer_load_ubyte [[LOADREG0:v[0-9]+]]
+; SI: buffer_load_ubyte [[LOADREG1:v[0-9]+]]
+; SI: buffer_load_ubyte [[LOADREG2:v[0-9]+]]
+; SI: buffer_load_ubyte [[LOADREG3:v[0-9]+]]
+; SI-NOT: v_lshlrev_b32
+; SI-NOT: v_or_b32
+
+; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG0]]
+; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, [[LOADREG1]]
+; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, [[LOADREG2]]
+; SI-DAG: v_cvt_f32_ubyte0_e32 v[[HIRESULT:[0-9]+]], [[LOADREG3]]
+
+; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
+define void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <4 x i8> addrspace(1)* %in, align 1
%cvt = uitofp <4 x i8> %load to <4 x float>
store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
ret void
}
-; XXX - This should really still be able to use the V_CVT_F32_UBYTE0
+; XXX - This should really still be able to use the v_cvt_f32_ubyte0
; for each component, but computeKnownBits doesn't handle vectors very
; well.
-; SI-LABEL: @load_v4i8_to_v4f32_2_uses:
-; SI: BUFFER_LOAD_UBYTE
-; SI: V_CVT_F32_UBYTE0_e32
-; SI: BUFFER_LOAD_UBYTE
-; SI: V_CVT_F32_UBYTE0_e32
-; SI: BUFFER_LOAD_UBYTE
-; SI: V_CVT_F32_UBYTE0_e32
-; SI: BUFFER_LOAD_UBYTE
-; SI: V_CVT_F32_UBYTE0_e32
+; SI-LABEL: {{^}}load_v4i8_to_v4f32_2_uses:
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: v_cvt_f32_ubyte0_e32
+; SI: v_cvt_f32_ubyte0_e32
+; SI: v_cvt_f32_ubyte0_e32
+; SI: v_cvt_f32_ubyte0_e32
; XXX - replace with this when v4i8 loads aren't scalarized anymore.
-; XSI: BUFFER_LOAD_DWORD
-; XSI: V_CVT_F32_U32_e32
-; XSI: V_CVT_F32_U32_e32
-; XSI: V_CVT_F32_U32_e32
-; XSI: V_CVT_F32_U32_e32
-; SI: S_ENDPGM
+; XSI: buffer_load_dword
+; XSI: v_cvt_f32_u32_e32
+; XSI: v_cvt_f32_u32_e32
+; XSI: v_cvt_f32_u32_e32
+; XSI: v_cvt_f32_u32_e32
+; SI: s_endpgm
define void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %out2, <4 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <4 x i8> addrspace(1)* %in, align 4
%cvt = uitofp <4 x i8> %load to <4 x float>
@@ -93,8 +113,8 @@ define void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <
}
; Make sure this doesn't crash.
-; SI-LABEL: @load_v7i8_to_v7f32:
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}load_v7i8_to_v7f32:
+; SI: s_endpgm
define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <7 x i8> addrspace(1)* %in, align 1
%cvt = uitofp <7 x i8> %load to <7 x float>
@@ -102,28 +122,28 @@ define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8>
ret void
}
-; SI-LABEL: @load_v8i8_to_v8f32:
-; SI: BUFFER_LOAD_DWORDX2 v{{\[}}[[LOLOAD:[0-9]+]]:[[HILOAD:[0-9]+]]{{\]}},
-; SI-NOT: BFE
-; SI-NOT: LSHR
-; SI-DAG: V_CVT_F32_UBYTE3_e32 v{{[0-9]+}}, v[[LOLOAD]]
-; SI-DAG: V_CVT_F32_UBYTE2_e32 v{{[0-9]+}}, v[[LOLOAD]]
-; SI-DAG: V_CVT_F32_UBYTE1_e32 v{{[0-9]+}}, v[[LOLOAD]]
-; SI-DAG: V_CVT_F32_UBYTE0_e32 v{{[0-9]+}}, v[[LOLOAD]]
-; SI-DAG: V_CVT_F32_UBYTE3_e32 v{{[0-9]+}}, v[[HILOAD]]
-; SI-DAG: V_CVT_F32_UBYTE2_e32 v{{[0-9]+}}, v[[HILOAD]]
-; SI-DAG: V_CVT_F32_UBYTE1_e32 v{{[0-9]+}}, v[[HILOAD]]
-; SI-DAG: V_CVT_F32_UBYTE0_e32 v{{[0-9]+}}, v[[HILOAD]]
-; SI-NOT: BFE
-; SI-NOT: LSHR
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
+; SI-LABEL: {{^}}load_v8i8_to_v8f32:
+; SI: buffer_load_dwordx2 v{{\[}}[[LOLOAD:[0-9]+]]:[[HILOAD:[0-9]+]]{{\]}},
+; SI-NOT: bfe
+; SI-NOT: lshr
+; SI-DAG: v_cvt_f32_ubyte3_e32 v{{[0-9]+}}, v[[LOLOAD]]
+; SI-DAG: v_cvt_f32_ubyte2_e32 v{{[0-9]+}}, v[[LOLOAD]]
+; SI-DAG: v_cvt_f32_ubyte1_e32 v{{[0-9]+}}, v[[LOLOAD]]
+; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, v[[LOLOAD]]
+; SI-DAG: v_cvt_f32_ubyte3_e32 v{{[0-9]+}}, v[[HILOAD]]
+; SI-DAG: v_cvt_f32_ubyte2_e32 v{{[0-9]+}}, v[[HILOAD]]
+; SI-DAG: v_cvt_f32_ubyte1_e32 v{{[0-9]+}}, v[[HILOAD]]
+; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, v[[HILOAD]]
+; SI-NOT: bfe
+; SI-NOT: lshr
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <8 x i8> addrspace(1)* %in, align 1
%cvt = uitofp <8 x i8> %load to <8 x float>
@@ -131,11 +151,11 @@ define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8>
ret void
}
-; SI-LABEL: @i8_zext_inreg_i32_to_f32:
-; SI: BUFFER_LOAD_DWORD [[LOADREG:v[0-9]+]],
-; SI: V_ADD_I32_e32 [[ADD:v[0-9]+]], 2, [[LOADREG]]
-; SI-NEXT: V_CVT_F32_UBYTE0_e32 [[CONV:v[0-9]+]], [[ADD]]
-; SI: BUFFER_STORE_DWORD [[CONV]],
+; SI-LABEL: {{^}}i8_zext_inreg_i32_to_f32:
+; SI: buffer_load_dword [[LOADREG:v[0-9]+]],
+; SI: v_add_i32_e32 [[ADD:v[0-9]+]], 2, [[LOADREG]]
+; SI-NEXT: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[ADD]]
+; SI: buffer_store_dword [[CONV]],
define void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%load = load i32 addrspace(1)* %in, align 4
%add = add i32 %load, 2
@@ -145,7 +165,7 @@ define void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addr
ret void
}
-; SI-LABEL: @i8_zext_inreg_hi1_to_f32:
+; SI-LABEL: {{^}}i8_zext_inreg_hi1_to_f32:
define void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%load = load i32 addrspace(1)* %in, align 4
%inreg = and i32 %load, 65280
diff --git a/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll b/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll
index 6607c1218b56..1e47bfa0c779 100644
--- a/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll
+++ b/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll
@@ -7,7 +7,7 @@
; ISD::UINT_TO_FP and ISD::SINT_TO_FP opcodes.
-; CHECK: @sint
+; CHECK: {{^}}sint:
; CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @sint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) {
@@ -21,7 +21,7 @@ entry:
ret void
}
-;CHECK: @uint
+;CHECK: {{^}}uint:
;CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @uint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) {
diff --git a/test/CodeGen/R600/default-fp-mode.ll b/test/CodeGen/R600/default-fp-mode.ll
index b24a7a246fda..6b6d49996eb1 100644
--- a/test/CodeGen/R600/default-fp-mode.ll
+++ b/test/CodeGen/R600/default-fp-mode.ll
@@ -1,12 +1,12 @@
-; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=FP64-DENORMAL -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI -mattr=+fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=FP32-DENORMAL -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI -mattr=+fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=BOTH-DENORMAL -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=NO-DENORMAL -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI -mattr=+fp64-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=FP64-DENORMAL -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -mattr=+fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=FP32-DENORMAL -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -mattr=+fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=BOTH-DENORMAL -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=NO-DENORMAL -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -mattr=+fp64-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
-; FUNC-LABEL: @test_kernel
+; FUNC-LABEL: {{^}}test_kernel:
; DEFAULT: FloatMode: 192
; DEFAULT: IeeeMode: 0
diff --git a/test/CodeGen/R600/disconnected-predset-break-bug.ll b/test/CodeGen/R600/disconnected-predset-break-bug.ll
index 012c17b8fe4b..858e4b98f3ab 100644
--- a/test/CodeGen/R600/disconnected-predset-break-bug.ll
+++ b/test/CodeGen/R600/disconnected-predset-break-bug.ll
@@ -4,7 +4,7 @@
; result. This tests that there are no instructions between the PRED_SET*
; and the PREDICATE_BREAK in this loop.
-; CHECK: @loop_ge
+; CHECK: {{^}}loop_ge:
; CHECK: LOOP_START_DX10
; CHECK: ALU_PUSH_BEFORE
; CHECK-NEXT: JUMP
diff --git a/test/CodeGen/R600/dot4-folding.ll b/test/CodeGen/R600/dot4-folding.ll
index 3e8330f9b3ed..dca6a59c6e6a 100644
--- a/test/CodeGen/R600/dot4-folding.ll
+++ b/test/CodeGen/R600/dot4-folding.ll
@@ -2,7 +2,7 @@
; Exactly one constant vector can be folded into dot4, which means exactly
; 4 MOV instructions
-; CHECK: @main
+; CHECK: {{^}}main:
; CHECK: MOV
; CHECK: MOV
; CHECK: MOV
diff --git a/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll b/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll
new file mode 100644
index 000000000000..41afd503ef88
--- /dev/null
+++ b/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll
@@ -0,0 +1,69 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI --check-prefix=CHECK %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s
+
+declare i32 @llvm.r600.read.tidig.x() #0
+declare void @llvm.AMDGPU.barrier.local() #1
+
+; Function Attrs: nounwind
+; CHECK-LABEL: {{^}}signed_ds_offset_addressing_loop:
+; CHECK: BB0_1:
+; CHECK: v_add_i32_e32 [[VADDR:v[0-9]+]],
+; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]]
+; SI-DAG: v_add_i32_e32 [[VADDR4:v[0-9]+]], 4, [[VADDR]]
+; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR4]]
+; SI-DAG: v_add_i32_e32 [[VADDR0x80:v[0-9]+]], 0x80, [[VADDR]]
+; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x80]]
+; SI-DAG: v_add_i32_e32 [[VADDR0x84:v[0-9]+]], 0x84, [[VADDR]]
+; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x84]]
+; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], 0x100, [[VADDR]]
+; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x100]]
+
+; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:0 offset1:1
+; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:32 offset1:33
+; CI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]] offset:256
+; CHECK: s_endpgm
+define void @signed_ds_offset_addressing_loop(float addrspace(1)* noalias nocapture %out, float addrspace(3)* noalias nocapture readonly %lptr, i32 %n) #2 {
+entry:
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #0
+ %mul = shl nsw i32 %x.i, 1
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %sum.03 = phi float [ 0.000000e+00, %entry ], [ %add13, %for.body ]
+ %offset.02 = phi i32 [ %mul, %entry ], [ %add14, %for.body ]
+ %k.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ tail call void @llvm.AMDGPU.barrier.local() #1
+ %arrayidx = getelementptr inbounds float addrspace(3)* %lptr, i32 %offset.02
+ %tmp = load float addrspace(3)* %arrayidx, align 4
+ %add1 = add nsw i32 %offset.02, 1
+ %arrayidx2 = getelementptr inbounds float addrspace(3)* %lptr, i32 %add1
+ %tmp1 = load float addrspace(3)* %arrayidx2, align 4
+ %add3 = add nsw i32 %offset.02, 32
+ %arrayidx4 = getelementptr inbounds float addrspace(3)* %lptr, i32 %add3
+ %tmp2 = load float addrspace(3)* %arrayidx4, align 4
+ %add5 = add nsw i32 %offset.02, 33
+ %arrayidx6 = getelementptr inbounds float addrspace(3)* %lptr, i32 %add5
+ %tmp3 = load float addrspace(3)* %arrayidx6, align 4
+ %add7 = add nsw i32 %offset.02, 64
+ %arrayidx8 = getelementptr inbounds float addrspace(3)* %lptr, i32 %add7
+ %tmp4 = load float addrspace(3)* %arrayidx8, align 4
+ %add9 = fadd float %tmp, %tmp1
+ %add10 = fadd float %add9, %tmp2
+ %add11 = fadd float %add10, %tmp3
+ %add12 = fadd float %add11, %tmp4
+ %add13 = fadd float %sum.03, %add12
+ %inc = add nsw i32 %k.01, 1
+ %add14 = add nsw i32 %offset.02, 97
+ %exitcond = icmp eq i32 %inc, 8
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ %tmp5 = sext i32 %x.i to i64
+ %arrayidx15 = getelementptr inbounds float addrspace(1)* %out, i64 %tmp5
+ store float %add13, float addrspace(1)* %arrayidx15, align 4
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { noduplicate nounwind }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/R600/ds_read2.ll b/test/CodeGen/R600/ds_read2.ll
new file mode 100644
index 000000000000..c06b0b1392e2
--- /dev/null
+++ b/test/CodeGen/R600/ds_read2.ll
@@ -0,0 +1,515 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -strict-whitespace -check-prefix=SI %s
+
+; FIXME: We don't get cases where the address was an SGPR because we
+; get a copy to the address register for each one.
+
+@lds = addrspace(3) global [512 x float] undef, align 4
+ @lds.f64 = addrspace(3) global [512 x double] undef, align 8
+
+; SI-LABEL: @simple_read2_f32
+; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:8
+; SI: s_waitcnt lgkmcnt(0)
+; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @simple_read2_f32(float addrspace(1)* %out) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %add.x = add nsw i32 %x.i, 8
+ %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %sum = fadd float %val0, %val1
+ %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ store float %sum, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; SI-LABEL: @simple_read2_f32_max_offset
+; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:255
+; SI: s_waitcnt lgkmcnt(0)
+; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @simple_read2_f32_max_offset(float addrspace(1)* %out) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %add.x = add nsw i32 %x.i, 255
+ %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %sum = fadd float %val0, %val1
+ %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ store float %sum, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; SI-LABEL: @simple_read2_f32_too_far
+; SI-NOT ds_read2_b32
+; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}
+; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028
+; SI: s_endpgm
+define void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %add.x = add nsw i32 %x.i, 257
+ %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %sum = fadd float %val0, %val1
+ %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ store float %sum, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; SI-LABEL: @simple_read2_f32_x2
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
+; SI: s_endpgm
+define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 {
+ %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+ %idx.0 = add nsw i32 %tid.x, 0
+ %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
+ %val0 = load float addrspace(3)* %arrayidx0, align 4
+
+ %idx.1 = add nsw i32 %tid.x, 8
+ %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
+ %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %sum.0 = fadd float %val0, %val1
+
+ %idx.2 = add nsw i32 %tid.x, 11
+ %arrayidx2 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
+ %val2 = load float addrspace(3)* %arrayidx2, align 4
+
+ %idx.3 = add nsw i32 %tid.x, 27
+ %arrayidx3 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
+ %val3 = load float addrspace(3)* %arrayidx3, align 4
+ %sum.1 = fadd float %val2, %val3
+
+ %sum = fadd float %sum.0, %sum.1
+ %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %idx.0
+ store float %sum, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; Make sure there is an instruction between the two sets of reads.
+; SI-LABEL: @simple_read2_f32_x2_barrier
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
+; SI: s_barrier
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
+; SI: s_endpgm
+define void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) #0 {
+ %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+ %idx.0 = add nsw i32 %tid.x, 0
+ %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
+ %val0 = load float addrspace(3)* %arrayidx0, align 4
+
+ %idx.1 = add nsw i32 %tid.x, 8
+ %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
+ %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %sum.0 = fadd float %val0, %val1
+
+ call void @llvm.AMDGPU.barrier.local() #2
+
+ %idx.2 = add nsw i32 %tid.x, 11
+ %arrayidx2 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
+ %val2 = load float addrspace(3)* %arrayidx2, align 4
+
+ %idx.3 = add nsw i32 %tid.x, 27
+ %arrayidx3 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
+ %val3 = load float addrspace(3)* %arrayidx3, align 4
+ %sum.1 = fadd float %val2, %val3
+
+ %sum = fadd float %sum.0, %sum.1
+ %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %idx.0
+ store float %sum, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; For some reason adding something to the base address for the first
+; element results in only folding the inner pair.
+
+; SI-LABEL: @simple_read2_f32_x2_nonzero_base
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:2 offset1:8
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
+; SI: s_endpgm
+define void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* %out) #0 {
+ %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+ %idx.0 = add nsw i32 %tid.x, 2
+ %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
+ %val0 = load float addrspace(3)* %arrayidx0, align 4
+
+ %idx.1 = add nsw i32 %tid.x, 8
+ %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
+ %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %sum.0 = fadd float %val0, %val1
+
+ %idx.2 = add nsw i32 %tid.x, 11
+ %arrayidx2 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
+ %val2 = load float addrspace(3)* %arrayidx2, align 4
+
+ %idx.3 = add nsw i32 %tid.x, 27
+ %arrayidx3 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
+ %val3 = load float addrspace(3)* %arrayidx3, align 4
+ %sum.1 = fadd float %val2, %val3
+
+ %sum = fadd float %sum.0, %sum.1
+ %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %idx.0
+ store float %sum, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; Be careful of vectors of pointers. We don't know if the 2 pointers
+; in the vectors are really the same base, so this is not safe to
+; merge.
+; Base pointers come from different subregister of same super
+; register. We can't safely merge this.
+
+; SI-LABEL: @read2_ptr_is_subreg_arg_f32
+; SI-NOT: ds_read2_b32
+; SI: ds_read_b32
+; SI: ds_read_b32
+; SI: s_endpgm
+define void @read2_ptr_is_subreg_arg_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
+ %index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0
+ %gep = getelementptr inbounds <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1
+ %gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0
+ %gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1
+ %val0 = load float addrspace(3)* %gep.0, align 4
+ %val1 = load float addrspace(3)* %gep.1, align 4
+ %add.x = add nsw i32 %x.i, 8
+ %sum = fadd float %val0, %val1
+ %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ store float %sum, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; Apply a constant scalar offset after the pointer vector extract. We
+; are rejecting merges that have the same, constant 0 offset, so make
+; sure we are really rejecting it because of the different
+; subregisters.
+
+; SI-LABEL: @read2_ptr_is_subreg_arg_offset_f32
+; SI-NOT: ds_read2_b32
+; SI: ds_read_b32
+; SI: ds_read_b32
+; SI: s_endpgm
+define void @read2_ptr_is_subreg_arg_offset_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
+ %index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0
+ %gep = getelementptr inbounds <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1
+ %gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0
+ %gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1
+
+ ; Apply an additional offset after the vector that will be more obviously folded.
+ %gep.1.offset = getelementptr float addrspace(3)* %gep.1, i32 8
+
+ %val0 = load float addrspace(3)* %gep.0, align 4
+ %val1 = load float addrspace(3)* %gep.1.offset, align 4
+ %add.x = add nsw i32 %x.i, 8
+ %sum = fadd float %val0, %val1
+ %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ store float %sum, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; We should be able to merge in this case, but probably not worth the effort.
+; SI-NOT: ds_read2_b32
+; SI: ds_read_b32
+; SI: ds_read_b32
+; SI: s_endpgm
+define void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %ptr.0 = insertelement <2 x [512 x float] addrspace(3)*> undef, [512 x float] addrspace(3)* @lds, i32 0
+ %ptr.1 = insertelement <2 x [512 x float] addrspace(3)*> %ptr.0, [512 x float] addrspace(3)* @lds, i32 1
+ %x.i.v.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
+ %x.i.v.1 = insertelement <2 x i32> %x.i.v.0, i32 %x.i, i32 1
+ %idx = add <2 x i32> %x.i.v.1, <i32 0, i32 8>
+ %gep = getelementptr inbounds <2 x [512 x float] addrspace(3)*> %ptr.1, <2 x i32> <i32 0, i32 0>, <2 x i32> %idx
+ %gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0
+ %gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1
+ %val0 = load float addrspace(3)* %gep.0, align 4
+ %val1 = load float addrspace(3)* %gep.1, align 4
+ %add.x = add nsw i32 %x.i, 8
+ %sum = fadd float %val0, %val1
+ %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ store float %sum, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; SI-LABEL: @simple_read2_f32_volatile_0
+; SI-NOT ds_read2_b32
+; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}
+; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:32
+; SI: s_endpgm
+define void @simple_read2_f32_volatile_0(float addrspace(1)* %out) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %val0 = load volatile float addrspace(3)* %arrayidx0, align 4
+ %add.x = add nsw i32 %x.i, 8
+ %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %sum = fadd float %val0, %val1
+ %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ store float %sum, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; SI-LABEL: @simple_read2_f32_volatile_1
+; SI-NOT ds_read2_b32
+; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}
+; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:32
+; SI: s_endpgm
+define void @simple_read2_f32_volatile_1(float addrspace(1)* %out) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %add.x = add nsw i32 %x.i, 8
+ %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %val1 = load volatile float addrspace(3)* %arrayidx1, align 4
+ %sum = fadd float %val0, %val1
+ %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ store float %sum, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; Can't fold since not correctly aligned.
+; XXX: This isn't really testing anything useful now. I think CI
+; allows unaligned LDS accesses, which would be a problem here.
+; SI-LABEL: @unaligned_read2_f32
+; SI-NOT: ds_read2_b32
+; SI: s_endpgm
+define void @unaligned_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %x.i
+ %val0 = load float addrspace(3)* %arrayidx0, align 1
+ %add.x = add nsw i32 %x.i, 8
+ %arrayidx1 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x
+ %val1 = load float addrspace(3)* %arrayidx1, align 1
+ %sum = fadd float %val0, %val1
+ %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ store float %sum, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; SI-LABEL: @misaligned_2_simple_read2_f32
+; SI-NOT: ds_read2_b32
+; SI: s_endpgm
+define void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %x.i
+ %val0 = load float addrspace(3)* %arrayidx0, align 2
+ %add.x = add nsw i32 %x.i, 8
+ %arrayidx1 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x
+ %val1 = load float addrspace(3)* %arrayidx1, align 2
+ %sum = fadd float %val0, %val1
+ %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ store float %sum, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; SI-LABEL: @simple_read2_f64
+; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, {{v[0-9]+}}
+; SI: ds_read2_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]] offset0:0 offset1:8
+; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
+; SI: buffer_store_dwordx2 [[RESULT]]
+; SI: s_endpgm
+define void @simple_read2_f64(double addrspace(1)* %out) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
+ %val0 = load double addrspace(3)* %arrayidx0, align 8
+ %add.x = add nsw i32 %x.i, 8
+ %arrayidx1 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
+ %val1 = load double addrspace(3)* %arrayidx1, align 8
+ %sum = fadd double %val0, %val1
+ %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ store double %sum, double addrspace(1)* %out.gep, align 8
+ ret void
+}
+
+; SI-LABEL: @simple_read2_f64_max_offset
+; SI: ds_read2_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:255
+; SI: s_endpgm
+define void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
+ %val0 = load double addrspace(3)* %arrayidx0, align 8
+ %add.x = add nsw i32 %x.i, 255
+ %arrayidx1 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
+ %val1 = load double addrspace(3)* %arrayidx1, align 8
+ %sum = fadd double %val0, %val1
+ %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ store double %sum, double addrspace(1)* %out.gep, align 8
+ ret void
+}
+
+; SI-LABEL: @simple_read2_f64_too_far
+; SI-NOT ds_read2_b64
+; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
+; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:2056
+; SI: s_endpgm
+define void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
+ %val0 = load double addrspace(3)* %arrayidx0, align 8
+ %add.x = add nsw i32 %x.i, 257
+ %arrayidx1 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
+ %val1 = load double addrspace(3)* %arrayidx1, align 8
+ %sum = fadd double %val0, %val1
+ %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ store double %sum, double addrspace(1)* %out.gep, align 8
+ ret void
+}
+
+; Alignment only 4
+; SI-LABEL: @misaligned_read2_f64
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:14 offset1:15
+; SI: s_endpgm
+define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i
+ %val0 = load double addrspace(3)* %arrayidx0, align 4
+ %add.x = add nsw i32 %x.i, 7
+ %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x
+ %val1 = load double addrspace(3)* %arrayidx1, align 4
+ %sum = fadd double %val0, %val1
+ %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ store double %sum, double addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+@foo = addrspace(3) global [4 x i32] undef, align 4
+
+; SI-LABEL: @load_constant_adjacent_offsets
+; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
+define void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) {
+ %val0 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
+ %val1 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
+ %sum = add i32 %val0, %val1
+ store i32 %sum, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: @load_constant_disjoint_offsets
+; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:2
+define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) {
+ %val0 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
+ %val1 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
+ %sum = add i32 %val0, %val1
+ store i32 %sum, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+@bar = addrspace(3) global [4 x i64] undef, align 4
+
+; SI-LABEL: @load_misaligned64_constant_offsets
+; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:2 offset1:3
+define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) {
+ %val0 = load i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
+ %val1 = load i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
+ %sum = add i64 %val0, %val1
+ store i64 %sum, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+@bar.large = addrspace(3) global [4096 x i64] undef, align 4
+
+; SI-LABEL: @load_misaligned64_constant_large_offsets
+; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
+; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000
+; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset0:0 offset1:1
+; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset0:0 offset1:1
+; SI: s_endpgm
+define void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) {
+ %val0 = load i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
+ %val1 = load i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
+ %sum = add i64 %val0, %val1
+ store i64 %sum, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+@sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] undef, align 4
+@sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] undef, align 4
+
+define void @sgemm_inner_loop_read2_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tgid.x() #1
+ %y.i = tail call i32 @llvm.r600.read.tidig.y() #1
+ %arrayidx44 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %x.i
+ %tmp16 = load float addrspace(3)* %arrayidx44, align 4
+ %add47 = add nsw i32 %x.i, 1
+ %arrayidx48 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add47
+ %tmp17 = load float addrspace(3)* %arrayidx48, align 4
+ %add51 = add nsw i32 %x.i, 16
+ %arrayidx52 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add51
+ %tmp18 = load float addrspace(3)* %arrayidx52, align 4
+ %add55 = add nsw i32 %x.i, 17
+ %arrayidx56 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add55
+ %tmp19 = load float addrspace(3)* %arrayidx56, align 4
+ %arrayidx60 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %y.i
+ %tmp20 = load float addrspace(3)* %arrayidx60, align 4
+ %add63 = add nsw i32 %y.i, 1
+ %arrayidx64 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add63
+ %tmp21 = load float addrspace(3)* %arrayidx64, align 4
+ %add67 = add nsw i32 %y.i, 32
+ %arrayidx68 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add67
+ %tmp22 = load float addrspace(3)* %arrayidx68, align 4
+ %add71 = add nsw i32 %y.i, 33
+ %arrayidx72 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add71
+ %tmp23 = load float addrspace(3)* %arrayidx72, align 4
+ %add75 = add nsw i32 %y.i, 64
+ %arrayidx76 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add75
+ %tmp24 = load float addrspace(3)* %arrayidx76, align 4
+ %add79 = add nsw i32 %y.i, 65
+ %arrayidx80 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add79
+ %tmp25 = load float addrspace(3)* %arrayidx80, align 4
+ %sum.0 = fadd float %tmp16, %tmp17
+ %sum.1 = fadd float %sum.0, %tmp18
+ %sum.2 = fadd float %sum.1, %tmp19
+ %sum.3 = fadd float %sum.2, %tmp20
+ %sum.4 = fadd float %sum.3, %tmp21
+ %sum.5 = fadd float %sum.4, %tmp22
+ %sum.6 = fadd float %sum.5, %tmp23
+ %sum.7 = fadd float %sum.6, %tmp24
+ %sum.8 = fadd float %sum.7, %tmp25
+ store float %sum.8, float addrspace(1)* %C, align 4
+ ret void
+}
+
+define void @misaligned_read2_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(3)* %in) #0 {
+ %load = load <2 x i32> addrspace(3)* %in, align 4
+ store <2 x i32> %load, <2 x i32> addrspace(1)* %out, align 8
+ ret void
+}
+
+define void @misaligned_read2_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %in) #0 {
+ %load = load i64 addrspace(3)* %in, align 4
+ store i64 %load, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tgid.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tgid.y() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.y() #1
+
+; Function Attrs: noduplicate nounwind
+declare void @llvm.AMDGPU.barrier.local() #2
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { noduplicate nounwind }
diff --git a/test/CodeGen/R600/ds_read2_offset_order.ll b/test/CodeGen/R600/ds_read2_offset_order.ll
new file mode 100644
index 000000000000..bdbe22ff2348
--- /dev/null
+++ b/test/CodeGen/R600/ds_read2_offset_order.ll
@@ -0,0 +1,44 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -strict-whitespace -check-prefix=SI %s
+
+; XFAIL: *
+
+@lds = addrspace(3) global [512 x float] undef, align 4
+
+; SI-LABEL: {{^}}offset_order:
+
+; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:56
+; SI: ds_read2st64_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset0:0 offset1:4
+; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset0:2 offset1:3
+; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset0:11 offset1:1
+
+define void @offset_order(float addrspace(1)* %out) {
+entry:
+ %ptr0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 0
+ %val0 = load float addrspace(3)* %ptr0
+
+ %ptr1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 256
+ %val1 = load float addrspace(3)* %ptr1
+ %add1 = fadd float %val0, %val1
+
+ %ptr2 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 3
+ %val2 = load float addrspace(3)* %ptr2
+ %add2 = fadd float %add1, %val2
+
+ %ptr3 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 2
+ %val3 = load float addrspace(3)* %ptr3
+ %add3 = fadd float %add2, %val3
+
+ %ptr4 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 12
+ %val4 = load float addrspace(3)* %ptr4
+ %add4 = fadd float %add3, %val4
+
+ %ptr5 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 14
+ %val5 = load float addrspace(3)* %ptr5
+ %add5 = fadd float %add4, %val5
+
+ %ptr6 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 11
+ %val6 = load float addrspace(3)* %ptr6
+ %add6 = fadd float %add5, %val6
+ store float %add6, float addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/ds_read2st64.ll b/test/CodeGen/R600/ds_read2st64.ll
new file mode 100644
index 000000000000..24834af20404
--- /dev/null
+++ b/test/CodeGen/R600/ds_read2st64.ll
@@ -0,0 +1,272 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI %s
+
+@lds = addrspace(3) global [512 x float] undef, align 4
+@lds.f64 = addrspace(3) global [512 x double] undef, align 8
+
+
+; SI-LABEL: @simple_read2st64_f32_0_1
+; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: s_waitcnt lgkmcnt(0)
+; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @simple_read2st64_f32_0_1(float addrspace(1)* %out) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %add.x = add nsw i32 %x.i, 64
+ %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %sum = fadd float %val0, %val1
+ %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ store float %sum, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; SI-LABEL: @simple_read2st64_f32_1_2
+; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2
+; SI: s_waitcnt lgkmcnt(0)
+; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @simple_read2st64_f32_1_2(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %add.x.0 = add nsw i32 %x.i, 64
+ %arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x.0
+ %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %add.x.1 = add nsw i32 %x.i, 128
+ %arrayidx1 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x.1
+ %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %sum = fadd float %val0, %val1
+ %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ store float %sum, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; SI-LABEL: @simple_read2st64_f32_max_offset
+; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:255
+; SI: s_waitcnt lgkmcnt(0)
+; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %add.x.0 = add nsw i32 %x.i, 64
+ %arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x.0
+ %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %add.x.1 = add nsw i32 %x.i, 16320
+ %arrayidx1 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x.1
+ %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %sum = fadd float %val0, %val1
+ %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ store float %sum, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; SI-LABEL: @simple_read2st64_f32_over_max_offset
+; SI-NOT: ds_read2st64_b32
+; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
+; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
+; SI: ds_read_b32 {{v[0-9]+}}, [[BIGADD]]
+; SI: s_endpgm
+define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %add.x.0 = add nsw i32 %x.i, 64
+ %arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x.0
+ %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %add.x.1 = add nsw i32 %x.i, 16384
+ %arrayidx1 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x.1
+ %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %sum = fadd float %val0, %val1
+ %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ store float %sum, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; SI-LABEL: @odd_invalid_read2st64_f32_0
+; SI-NOT: ds_read2st64_b32
+; SI: s_endpgm
+define void @odd_invalid_read2st64_f32_0(float addrspace(1)* %out) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %add.x = add nsw i32 %x.i, 63
+ %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %sum = fadd float %val0, %val1
+ %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ store float %sum, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; SI-LABEL: @odd_invalid_read2st64_f32_1
+; SI-NOT: ds_read2st64_b32
+; SI: s_endpgm
+define void @odd_invalid_read2st64_f32_1(float addrspace(1)* %out) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %add.x.0 = add nsw i32 %x.i, 64
+ %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.0
+ %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %add.x.1 = add nsw i32 %x.i, 127
+ %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.1
+ %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %sum = fadd float %val0, %val1
+ %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ store float %sum, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; SI-LABEL: @simple_read2st64_f64_0_1
+; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: s_waitcnt lgkmcnt(0)
+; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
+; SI: buffer_store_dwordx2 [[RESULT]]
+; SI: s_endpgm
+define void @simple_read2st64_f64_0_1(double addrspace(1)* %out) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
+ %val0 = load double addrspace(3)* %arrayidx0, align 8
+ %add.x = add nsw i32 %x.i, 64
+ %arrayidx1 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
+ %val1 = load double addrspace(3)* %arrayidx1, align 8
+ %sum = fadd double %val0, %val1
+ %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ store double %sum, double addrspace(1)* %out.gep, align 8
+ ret void
+}
+
+; SI-LABEL: @simple_read2st64_f64_1_2
+; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2
+; SI: s_waitcnt lgkmcnt(0)
+; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
+; SI: buffer_store_dwordx2 [[RESULT]]
+; SI: s_endpgm
+define void @simple_read2st64_f64_1_2(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %add.x.0 = add nsw i32 %x.i, 64
+ %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.0
+ %val0 = load double addrspace(3)* %arrayidx0, align 8
+ %add.x.1 = add nsw i32 %x.i, 128
+ %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.1
+ %val1 = load double addrspace(3)* %arrayidx1, align 8
+ %sum = fadd double %val0, %val1
+ %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ store double %sum, double addrspace(1)* %out.gep, align 8
+ ret void
+}
+
+; Alignment only
+
+; SI-LABEL: @misaligned_read2st64_f64
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:128 offset1:129
+; SI: s_endpgm
+define void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i
+ %val0 = load double addrspace(3)* %arrayidx0, align 4
+ %add.x = add nsw i32 %x.i, 64
+ %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x
+ %val1 = load double addrspace(3)* %arrayidx1, align 4
+ %sum = fadd double %val0, %val1
+ %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ store double %sum, double addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; The maximum is not the usual 0xff because 0xff * 8 * 64 > 0xffff
+; SI-LABEL: @simple_read2st64_f64_max_offset
+; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:4 offset1:127
+; SI: s_waitcnt lgkmcnt(0)
+; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
+; SI: buffer_store_dwordx2 [[RESULT]]
+; SI: s_endpgm
+define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %add.x.0 = add nsw i32 %x.i, 256
+ %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.0
+ %val0 = load double addrspace(3)* %arrayidx0, align 8
+ %add.x.1 = add nsw i32 %x.i, 8128
+ %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.1
+ %val1 = load double addrspace(3)* %arrayidx1, align 8
+ %sum = fadd double %val0, %val1
+ %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ store double %sum, double addrspace(1)* %out.gep, align 8
+ ret void
+}
+
+; SI-LABEL: @simple_read2st64_f64_over_max_offset
+; SI-NOT: ds_read2st64_b64
+; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
+; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
+; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]]
+; SI: s_endpgm
+define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %add.x.0 = add nsw i32 %x.i, 64
+ %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.0
+ %val0 = load double addrspace(3)* %arrayidx0, align 8
+ %add.x.1 = add nsw i32 %x.i, 8192
+ %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.1
+ %val1 = load double addrspace(3)* %arrayidx1, align 8
+ %sum = fadd double %val0, %val1
+ %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ store double %sum, double addrspace(1)* %out.gep, align 8
+ ret void
+}
+
+; SI-LABEL: @invalid_read2st64_f64_odd_offset
+; SI-NOT: ds_read2st64_b64
+; SI: s_endpgm
+define void @invalid_read2st64_f64_odd_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %add.x.0 = add nsw i32 %x.i, 64
+ %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.0
+ %val0 = load double addrspace(3)* %arrayidx0, align 8
+ %add.x.1 = add nsw i32 %x.i, 8129
+ %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.1
+ %val1 = load double addrspace(3)* %arrayidx1, align 8
+ %sum = fadd double %val0, %val1
+ %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ store double %sum, double addrspace(1)* %out.gep, align 8
+ ret void
+}
+
+; The stride of 8 elements is 8 * 8 bytes. We need to make sure the
+; stride in elements, not bytes, is a multiple of 64.
+
+; SI-LABEL: @byte_size_only_divisible_64_read2_f64
+; SI-NOT: ds_read2st_b64
+; SI: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:8
+; SI: s_endpgm
+define void @byte_size_only_divisible_64_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i
+ %val0 = load double addrspace(3)* %arrayidx0, align 8
+ %add.x = add nsw i32 %x.i, 8
+ %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x
+ %val1 = load double addrspace(3)* %arrayidx1, align 8
+ %sum = fadd double %val0, %val1
+ %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ store double %sum, double addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tgid.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tgid.y() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.y() #1
+
+; Function Attrs: noduplicate nounwind
+declare void @llvm.AMDGPU.barrier.local() #2
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { noduplicate nounwind }
diff --git a/test/CodeGen/R600/ds_write2.ll b/test/CodeGen/R600/ds_write2.ll
new file mode 100644
index 000000000000..27273e7c674d
--- /dev/null
+++ b/test/CodeGen/R600/ds_write2.ll
@@ -0,0 +1,425 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -strict-whitespace -check-prefix=SI %s
+
+@lds = addrspace(3) global [512 x float] undef, align 4
+@lds.f64 = addrspace(3) global [512 x double] undef, align 8
+
+
+; SI-LABEL: @simple_write2_one_val_f32
+; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
+; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
+; SI: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 [M0]
+; SI: s_endpgm
+define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %in.gep = getelementptr float addrspace(1)* %in, i32 %x.i
+ %val = load float addrspace(1)* %in.gep, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ store float %val, float addrspace(3)* %arrayidx0, align 4
+ %add.x = add nsw i32 %x.i, 8
+ %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ store float %val, float addrspace(3)* %arrayidx1, align 4
+ ret void
+}
+
+; SI-LABEL: @simple_write2_two_val_f32
+; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
+; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 [M0]
+; SI: s_endpgm
+define void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
+ %in.gep.1 = getelementptr float addrspace(1)* %in.gep.0, i32 1
+ %val0 = load float addrspace(1)* %in.gep.0, align 4
+ %val1 = load float addrspace(1)* %in.gep.1, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ store float %val0, float addrspace(3)* %arrayidx0, align 4
+ %add.x = add nsw i32 %x.i, 8
+ %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ store float %val1, float addrspace(3)* %arrayidx1, align 4
+ ret void
+}
+
+; SI-LABEL: @simple_write2_two_val_f32_volatile_0
+; SI-NOT: ds_write2_b32
+; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}}
+; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
+; SI: s_endpgm
+define void @simple_write2_two_val_f32_volatile_0(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
+ %in1.gep = getelementptr float addrspace(1)* %in1, i32 %x.i
+ %val0 = load float addrspace(1)* %in0.gep, align 4
+ %val1 = load float addrspace(1)* %in1.gep, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ store volatile float %val0, float addrspace(3)* %arrayidx0, align 4
+ %add.x = add nsw i32 %x.i, 8
+ %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ store float %val1, float addrspace(3)* %arrayidx1, align 4
+ ret void
+}
+
+; SI-LABEL: @simple_write2_two_val_f32_volatile_1
+; SI-NOT: ds_write2_b32
+; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}}
+; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
+; SI: s_endpgm
+define void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
+ %in1.gep = getelementptr float addrspace(1)* %in1, i32 %x.i
+ %val0 = load float addrspace(1)* %in0.gep, align 4
+ %val1 = load float addrspace(1)* %in1.gep, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ store float %val0, float addrspace(3)* %arrayidx0, align 4
+ %add.x = add nsw i32 %x.i, 8
+ %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ store volatile float %val1, float addrspace(3)* %arrayidx1, align 4
+ ret void
+}
+
+; 2 data subregisters from different super registers.
+; SI-LABEL: @simple_write2_two_val_subreg2_mixed_f32
+; SI: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:{{[0-9]+\]}}
+; SI: buffer_load_dwordx2 v{{\[[0-9]+}}:[[VAL1:[0-9]+]]{{\]}}
+; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
+; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
+; SI: s_endpgm
+define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %in.gep.0 = getelementptr <2 x float> addrspace(1)* %in, i32 %x.i
+ %in.gep.1 = getelementptr <2 x float> addrspace(1)* %in.gep.0, i32 1
+ %val0 = load <2 x float> addrspace(1)* %in.gep.0, align 8
+ %val1 = load <2 x float> addrspace(1)* %in.gep.1, align 8
+ %val0.0 = extractelement <2 x float> %val0, i32 0
+ %val1.1 = extractelement <2 x float> %val1, i32 1
+ %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ store float %val0.0, float addrspace(3)* %arrayidx0, align 4
+ %add.x = add nsw i32 %x.i, 8
+ %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ store float %val1.1, float addrspace(3)* %arrayidx1, align 4
+ ret void
+}
+
+; SI-LABEL: @simple_write2_two_val_subreg2_f32
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
+; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
+; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
+; SI: s_endpgm
+define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %in.gep = getelementptr <2 x float> addrspace(1)* %in, i32 %x.i
+ %val = load <2 x float> addrspace(1)* %in.gep, align 8
+ %val0 = extractelement <2 x float> %val, i32 0
+ %val1 = extractelement <2 x float> %val, i32 1
+ %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ store float %val0, float addrspace(3)* %arrayidx0, align 4
+ %add.x = add nsw i32 %x.i, 8
+ %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ store float %val1, float addrspace(3)* %arrayidx1, align 4
+ ret void
+}
+
+; SI-LABEL: @simple_write2_two_val_subreg4_f32
+; SI-DAG: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
+; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
+; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
+; SI: s_endpgm
+define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %in.gep = getelementptr <4 x float> addrspace(1)* %in, i32 %x.i
+ %val = load <4 x float> addrspace(1)* %in.gep, align 16
+ %val0 = extractelement <4 x float> %val, i32 0
+ %val1 = extractelement <4 x float> %val, i32 3
+ %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ store float %val0, float addrspace(3)* %arrayidx0, align 4
+ %add.x = add nsw i32 %x.i, 8
+ %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ store float %val1, float addrspace(3)* %arrayidx1, align 4
+ ret void
+}
+
+; SI-LABEL: @simple_write2_two_val_max_offset_f32
+; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
+; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 [M0]
+; SI: s_endpgm
+define void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
+ %in.gep.1 = getelementptr float addrspace(1)* %in.gep.0, i32 1
+ %val0 = load float addrspace(1)* %in.gep.0, align 4
+ %val1 = load float addrspace(1)* %in.gep.1, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ store float %val0, float addrspace(3)* %arrayidx0, align 4
+ %add.x = add nsw i32 %x.i, 255
+ %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ store float %val1, float addrspace(3)* %arrayidx1, align 4
+ ret void
+}
+
+; SI-LABEL: @simple_write2_two_val_too_far_f32
+; SI: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}
+; SI: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028
+; SI: s_endpgm
+define void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
+ %in1.gep = getelementptr float addrspace(1)* %in1, i32 %x.i
+ %val0 = load float addrspace(1)* %in0.gep, align 4
+ %val1 = load float addrspace(1)* %in1.gep, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ store float %val0, float addrspace(3)* %arrayidx0, align 4
+ %add.x = add nsw i32 %x.i, 257
+ %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ store float %val1, float addrspace(3)* %arrayidx1, align 4
+ ret void
+}
+
+; SI-LABEL: @simple_write2_two_val_f32_x2
+; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:0 offset1:8
+; SI-NEXT: ds_write2_b32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
+; SI: s_endpgm
+define void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
+ %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+ %in0.gep = getelementptr float addrspace(1)* %in0, i32 %tid.x
+ %in1.gep = getelementptr float addrspace(1)* %in1, i32 %tid.x
+ %val0 = load float addrspace(1)* %in0.gep, align 4
+ %val1 = load float addrspace(1)* %in1.gep, align 4
+
+ %idx.0 = add nsw i32 %tid.x, 0
+ %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
+ store float %val0, float addrspace(3)* %arrayidx0, align 4
+
+ %idx.1 = add nsw i32 %tid.x, 8
+ %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
+ store float %val1, float addrspace(3)* %arrayidx1, align 4
+
+ %idx.2 = add nsw i32 %tid.x, 11
+ %arrayidx2 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
+ store float %val0, float addrspace(3)* %arrayidx2, align 4
+
+ %idx.3 = add nsw i32 %tid.x, 27
+ %arrayidx3 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
+ store float %val1, float addrspace(3)* %arrayidx3, align 4
+
+ ret void
+}
+
+; SI-LABEL: @simple_write2_two_val_f32_x2_nonzero_base
+; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:3 offset1:8
+; SI-NEXT: ds_write2_b32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
+; SI: s_endpgm
+define void @simple_write2_two_val_f32_x2_nonzero_base(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
+ %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+ %in0.gep = getelementptr float addrspace(1)* %in0, i32 %tid.x
+ %in1.gep = getelementptr float addrspace(1)* %in1, i32 %tid.x
+ %val0 = load float addrspace(1)* %in0.gep, align 4
+ %val1 = load float addrspace(1)* %in1.gep, align 4
+
+ %idx.0 = add nsw i32 %tid.x, 3
+ %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
+ store float %val0, float addrspace(3)* %arrayidx0, align 4
+
+ %idx.1 = add nsw i32 %tid.x, 8
+ %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
+ store float %val1, float addrspace(3)* %arrayidx1, align 4
+
+ %idx.2 = add nsw i32 %tid.x, 11
+ %arrayidx2 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
+ store float %val0, float addrspace(3)* %arrayidx2, align 4
+
+ %idx.3 = add nsw i32 %tid.x, 27
+ %arrayidx3 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
+ store float %val1, float addrspace(3)* %arrayidx3, align 4
+
+ ret void
+}
+
+; SI-LABEL: @write2_ptr_subreg_arg_two_val_f32
+; SI-NOT: ds_write2_b32
+; SI: ds_write_b32
+; SI: ds_write_b32
+; SI: s_endpgm
+define void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1, <2 x float addrspace(3)*> %lds.ptr) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
+ %in1.gep = getelementptr float addrspace(1)* %in1, i32 %x.i
+ %val0 = load float addrspace(1)* %in0.gep, align 4
+ %val1 = load float addrspace(1)* %in1.gep, align 4
+
+ %index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
+ %index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0
+ %gep = getelementptr inbounds <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1
+ %gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0
+ %gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1
+
+ ; Apply an additional offset after the vector that will be more obviously folded.
+ %gep.1.offset = getelementptr float addrspace(3)* %gep.1, i32 8
+ store float %val0, float addrspace(3)* %gep.0, align 4
+
+ %add.x = add nsw i32 %x.i, 8
+ store float %val1, float addrspace(3)* %gep.1.offset, align 4
+ ret void
+}
+
+; SI-LABEL: @simple_write2_one_val_f64
+; SI: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]],
+; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
+; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 [M0]
+; SI: s_endpgm
+define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %in.gep = getelementptr double addrspace(1)* %in, i32 %x.i
+ %val = load double addrspace(1)* %in.gep, align 8
+ %arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
+ store double %val, double addrspace(3)* %arrayidx0, align 8
+ %add.x = add nsw i32 %x.i, 8
+ %arrayidx1 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
+ store double %val, double addrspace(3)* %arrayidx1, align 8
+ ret void
+}
+
+; SI-LABEL: @misaligned_simple_write2_one_val_f64
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
+; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
+; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:1 [M0]
+; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15 [M0]
+; SI: s_endpgm
+define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %in.gep = getelementptr double addrspace(1)* %in, i32 %x.i
+ %val = load double addrspace(1)* %in.gep, align 8
+ %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i
+ store double %val, double addrspace(3)* %arrayidx0, align 4
+ %add.x = add nsw i32 %x.i, 7
+ %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x
+ store double %val, double addrspace(3)* %arrayidx1, align 4
+ ret void
+}
+
+; SI-LABEL: @simple_write2_two_val_f64
+; SI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
+; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
+; SI: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 [M0]
+; SI: s_endpgm
+define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %in.gep.0 = getelementptr double addrspace(1)* %in, i32 %x.i
+ %in.gep.1 = getelementptr double addrspace(1)* %in.gep.0, i32 1
+ %val0 = load double addrspace(1)* %in.gep.0, align 8
+ %val1 = load double addrspace(1)* %in.gep.1, align 8
+ %arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
+ store double %val0, double addrspace(3)* %arrayidx0, align 8
+ %add.x = add nsw i32 %x.i, 8
+ %arrayidx1 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
+ store double %val1, double addrspace(3)* %arrayidx1, align 8
+ ret void
+}
+
+@foo = addrspace(3) global [4 x i32] undef, align 4
+
+; SI-LABEL: @store_constant_adjacent_offsets
+; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
+; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+define void @store_constant_adjacent_offsets() {
+ store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
+ store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
+ ret void
+}
+
+; SI-LABEL: @store_constant_disjoint_offsets
+; SI-DAG: v_mov_b32_e32 [[VAL:v[0-9]+]], 0x7b{{$}}
+; SI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
+; SI: ds_write2_b32 [[ZERO]], [[VAL]], [[VAL]] offset0:0 offset1:2
+define void @store_constant_disjoint_offsets() {
+ store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
+ store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
+ ret void
+}
+
+@bar = addrspace(3) global [4 x i64] undef, align 4
+
+; SI-LABEL: @store_misaligned64_constant_offsets
+; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
+; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
+define void @store_misaligned64_constant_offsets() {
+ store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
+ store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
+ ret void
+}
+
+@bar.large = addrspace(3) global [4096 x i64] undef, align 4
+
+; SI-LABEL: @store_misaligned64_constant_large_offsets
+; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
+; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000{{$}}
+; SI-DAG: ds_write2_b32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI-DAG: ds_write2_b32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: s_endpgm
+define void @store_misaligned64_constant_large_offsets() {
+ store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
+ store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
+ ret void
+}
+
+@sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] undef, align 4
+@sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] undef, align 4
+
+define void @write2_sgemm_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb, float addrspace(1)* %in) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tgid.x() #1
+ %y.i = tail call i32 @llvm.r600.read.tidig.y() #1
+ %val = load float addrspace(1)* %in
+ %arrayidx44 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %x.i
+ store float %val, float addrspace(3)* %arrayidx44, align 4
+ %add47 = add nsw i32 %x.i, 1
+ %arrayidx48 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add47
+ store float %val, float addrspace(3)* %arrayidx48, align 4
+ %add51 = add nsw i32 %x.i, 16
+ %arrayidx52 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add51
+ store float %val, float addrspace(3)* %arrayidx52, align 4
+ %add55 = add nsw i32 %x.i, 17
+ %arrayidx56 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add55
+ store float %val, float addrspace(3)* %arrayidx56, align 4
+ %arrayidx60 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %y.i
+ store float %val, float addrspace(3)* %arrayidx60, align 4
+ %add63 = add nsw i32 %y.i, 1
+ %arrayidx64 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add63
+ store float %val, float addrspace(3)* %arrayidx64, align 4
+ %add67 = add nsw i32 %y.i, 32
+ %arrayidx68 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add67
+ store float %val, float addrspace(3)* %arrayidx68, align 4
+ %add71 = add nsw i32 %y.i, 33
+ %arrayidx72 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add71
+ store float %val, float addrspace(3)* %arrayidx72, align 4
+ %add75 = add nsw i32 %y.i, 64
+ %arrayidx76 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add75
+ store float %val, float addrspace(3)* %arrayidx76, align 4
+ %add79 = add nsw i32 %y.i, 65
+ %arrayidx80 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add79
+ store float %val, float addrspace(3)* %arrayidx80, align 4
+ ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tgid.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tgid.y() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.y() #1
+
+; Function Attrs: noduplicate nounwind
+declare void @llvm.AMDGPU.barrier.local() #2
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { noduplicate nounwind }
diff --git a/test/CodeGen/R600/ds_write2st64.ll b/test/CodeGen/R600/ds_write2st64.ll
new file mode 100644
index 000000000000..de5f4efcbcd3
--- /dev/null
+++ b/test/CodeGen/R600/ds_write2st64.ll
@@ -0,0 +1,119 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI %s
+
+
+@lds = addrspace(3) global [512 x float] undef, align 4
+
+
+; SI-LABEL: @simple_write2st64_one_val_f32_0_1
+; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
+; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
+; SI: ds_write2st64_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:1 [M0]
+; SI: s_endpgm
+define void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %in.gep = getelementptr float addrspace(1)* %in, i32 %x.i
+ %val = load float addrspace(1)* %in.gep, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ store float %val, float addrspace(3)* %arrayidx0, align 4
+ %add.x = add nsw i32 %x.i, 64
+ %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ store float %val, float addrspace(3)* %arrayidx1, align 4
+ ret void
+}
+
+; SI-LABEL: @simple_write2st64_two_val_f32_2_5
+; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
+; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:2 offset1:5 [M0]
+; SI: s_endpgm
+define void @simple_write2st64_two_val_f32_2_5(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
+ %in.gep.1 = getelementptr float addrspace(1)* %in.gep.0, i32 1
+ %val0 = load float addrspace(1)* %in.gep.0, align 4
+ %val1 = load float addrspace(1)* %in.gep.1, align 4
+ %add.x.0 = add nsw i32 %x.i, 128
+ %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.0
+ store float %val0, float addrspace(3)* %arrayidx0, align 4
+ %add.x.1 = add nsw i32 %x.i, 320
+ %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.1
+ store float %val1, float addrspace(3)* %arrayidx1, align 4
+ ret void
+}
+
+; SI-LABEL: @simple_write2st64_two_val_max_offset_f32
+; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
+; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 [M0]
+; SI: s_endpgm
+define void @simple_write2st64_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in, float addrspace(3)* %lds) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
+ %in.gep.1 = getelementptr float addrspace(1)* %in.gep.0, i32 1
+ %val0 = load float addrspace(1)* %in.gep.0, align 4
+ %val1 = load float addrspace(1)* %in.gep.1, align 4
+ %arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %x.i
+ store float %val0, float addrspace(3)* %arrayidx0, align 4
+ %add.x = add nsw i32 %x.i, 16320
+ %arrayidx1 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x
+ store float %val1, float addrspace(3)* %arrayidx1, align 4
+ ret void
+}
+
+; SI-LABEL: @simple_write2st64_two_val_max_offset_f64
+; SI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
+; SI-DAG: v_add_i32_e32 [[VPTR:v[0-9]+]],
+; SI: ds_write2st64_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:4 offset1:127 [M0]
+; SI: s_endpgm
+define void @simple_write2st64_two_val_max_offset_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %in.gep.0 = getelementptr double addrspace(1)* %in, i32 %x.i
+ %in.gep.1 = getelementptr double addrspace(1)* %in.gep.0, i32 1
+ %val0 = load double addrspace(1)* %in.gep.0, align 8
+ %val1 = load double addrspace(1)* %in.gep.1, align 8
+ %add.x.0 = add nsw i32 %x.i, 256
+ %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.0
+ store double %val0, double addrspace(3)* %arrayidx0, align 8
+ %add.x.1 = add nsw i32 %x.i, 8128
+ %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.1
+ store double %val1, double addrspace(3)* %arrayidx1, align 8
+ ret void
+}
+
+; SI-LABEL: @byte_size_only_divisible_64_write2st64_f64
+; SI-NOT: ds_write2st64_b64
+; SI: ds_write2_b64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:0 offset1:8
+; SI: s_endpgm
+define void @byte_size_only_divisible_64_write2st64_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
+ %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+ %in.gep = getelementptr double addrspace(1)* %in, i32 %x.i
+ %val = load double addrspace(1)* %in.gep, align 8
+ %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i
+ store double %val, double addrspace(3)* %arrayidx0, align 8
+ %add.x = add nsw i32 %x.i, 8
+ %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x
+ store double %val, double addrspace(3)* %arrayidx1, align 8
+ ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tgid.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tgid.y() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.y() #1
+
+; Function Attrs: noduplicate nounwind
+declare void @llvm.AMDGPU.barrier.local() #2
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { noduplicate nounwind }
diff --git a/test/CodeGen/R600/elf.ll b/test/CodeGen/R600/elf.ll
index 93851504bd4e..ec28ed9c1dcd 100644
--- a/test/CodeGen/R600/elf.ll
+++ b/test/CodeGen/R600/elf.ll
@@ -1,10 +1,16 @@
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs -filetype=obj | llvm-readobj -s - | FileCheck --check-prefix=ELF-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG-CHECK %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols - | FileCheck --check-prefix=ELF-CHECK %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG-CHECK %s
; ELF-CHECK: Format: ELF32
; ELF-CHECK: Name: .AMDGPU.config
; ELF-CHECK: Type: SHT_PROGBITS
+; ELF-CHECK: Symbol {
+; ELF-CHECK: Name: test
+; ELF-CHECK: Binding: Global
+
+; CONFIG-CHECK: .align 256
+; CONFIG-CHECK: test:
; CONFIG-CHECK: .section .AMDGPU.config
; CONFIG-CHECK-NEXT: .long 45096
; CONFIG-CHECK-NEXT: .long 0
diff --git a/test/CodeGen/R600/empty-function.ll b/test/CodeGen/R600/empty-function.ll
new file mode 100644
index 000000000000..4b81d971d06b
--- /dev/null
+++ b/test/CodeGen/R600/empty-function.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+; Make sure we don't assert on empty functions
+
+; SI-LABEL: {{^}}empty_function_ret:
+; SI: .text
+; SI: s_endpgm
+; SI: codeLenInByte = 4
+define void @empty_function_ret() #0 {
+ ret void
+}
+
+; SI-LABEL: {{^}}empty_function_unreachable:
+; SI: .text
+; SI: codeLenInByte = 0
+define void @empty_function_unreachable() #0 {
+ unreachable
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/R600/extload.ll b/test/CodeGen/R600/extload.ll
index dc056e0ecdd5..4a94acaba0b5 100644
--- a/test/CodeGen/R600/extload.ll
+++ b/test/CodeGen/R600/extload.ll
@@ -1,7 +1,7 @@
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; FUNC-LABEL: @anyext_load_i8:
+; FUNC-LABEL: {{^}}anyext_load_i8:
; EG: AND_INT
; EG: 255
define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspace(1)* nocapture noalias %src) nounwind {
@@ -13,7 +13,7 @@ define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspac
ret void
}
-; FUNC-LABEL: @anyext_load_i16:
+; FUNC-LABEL: {{^}}anyext_load_i16:
; EG: AND_INT
; EG: AND_INT
; EG-DAG: 65535
@@ -27,7 +27,7 @@ define void @anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrs
ret void
}
-; FUNC-LABEL: @anyext_load_lds_i8:
+; FUNC-LABEL: {{^}}anyext_load_lds_i8:
; EG: AND_INT
; EG: 255
define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addrspace(3)* nocapture noalias %src) nounwind {
@@ -39,7 +39,7 @@ define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addr
ret void
}
-; FUNC-LABEL: @anyext_load_lds_i16:
+; FUNC-LABEL: {{^}}anyext_load_lds_i16:
; EG: AND_INT
; EG: AND_INT
; EG-DAG: 65535
@@ -52,72 +52,3 @@ define void @anyext_load_lds_i16(i16 addrspace(3)* nocapture noalias %out, i16 a
store <2 x i16> %x, <2 x i16> addrspace(3)* %castOut, align 1
ret void
}
-
-; FUNC-LABEL: @sextload_global_i8_to_i64
-; SI: BUFFER_LOAD_SBYTE [[LOAD:v[0-9]+]],
-; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
-; SI: BUFFER_STORE_DWORDX2
-define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
- %a = load i8 addrspace(1)* %in, align 8
- %ext = sext i8 %a to i64
- store i64 %ext, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: @sextload_global_i16_to_i64
-; SI: BUFFER_LOAD_SSHORT [[LOAD:v[0-9]+]],
-; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
-; SI: BUFFER_STORE_DWORDX2
-define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
- %a = load i16 addrspace(1)* %in, align 8
- %ext = sext i16 %a to i64
- store i64 %ext, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: @sextload_global_i32_to_i64
-; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
-; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
-; SI: BUFFER_STORE_DWORDX2
-define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %a = load i32 addrspace(1)* %in, align 8
- %ext = sext i32 %a to i64
- store i64 %ext, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: @zextload_global_i8_to_i64
-; SI: S_MOV_B32 [[ZERO:s[0-9]+]], 0
-; SI: BUFFER_LOAD_UBYTE [[LOAD:v[0-9]+]],
-; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]]
-; SI: BUFFER_STORE_DWORDX2
-define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
- %a = load i8 addrspace(1)* %in, align 8
- %ext = zext i8 %a to i64
- store i64 %ext, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: @zextload_global_i16_to_i64
-; SI: S_MOV_B32 [[ZERO:s[0-9]+]], 0
-; SI: BUFFER_LOAD_USHORT [[LOAD:v[0-9]+]],
-; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]]
-; SI: BUFFER_STORE_DWORDX2
-define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
- %a = load i16 addrspace(1)* %in, align 8
- %ext = zext i16 %a to i64
- store i64 %ext, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: @zextload_global_i32_to_i64
-; SI: S_MOV_B32 [[ZERO:s[0-9]+]], 0
-; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
-; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]]
-; SI: BUFFER_STORE_DWORDX2
-define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %a = load i32 addrspace(1)* %in, align 8
- %ext = zext i32 %a to i64
- store i64 %ext, i64 addrspace(1)* %out, align 8
- ret void
-}
diff --git a/test/CodeGen/R600/extract_vector_elt_i16.ll b/test/CodeGen/R600/extract_vector_elt_i16.ll
index 5cd1b04bd1de..04c375a89ae3 100644
--- a/test/CodeGen/R600/extract_vector_elt_i16.ll
+++ b/test/CodeGen/R600/extract_vector_elt_i16.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; FUNC-LABEL: @extract_vector_elt_v2i16
-; SI: BUFFER_LOAD_USHORT
-; SI: BUFFER_STORE_SHORT
-; SI: BUFFER_LOAD_USHORT
-; SI: BUFFER_STORE_SHORT
+; FUNC-LABEL: {{^}}extract_vector_elt_v2i16:
+; SI: buffer_load_ushort
+; SI: buffer_load_ushort
+; SI: buffer_store_short
+; SI: buffer_store_short
define void @extract_vector_elt_v2i16(i16 addrspace(1)* %out, <2 x i16> %foo) nounwind {
%p0 = extractelement <2 x i16> %foo, i32 0
%p1 = extractelement <2 x i16> %foo, i32 1
@@ -14,11 +14,11 @@ define void @extract_vector_elt_v2i16(i16 addrspace(1)* %out, <2 x i16> %foo) no
ret void
}
-; FUNC-LABEL: @extract_vector_elt_v4i16
-; SI: BUFFER_LOAD_USHORT
-; SI: BUFFER_STORE_SHORT
-; SI: BUFFER_LOAD_USHORT
-; SI: BUFFER_STORE_SHORT
+; FUNC-LABEL: {{^}}extract_vector_elt_v4i16:
+; SI: buffer_load_ushort
+; SI: buffer_load_ushort
+; SI: buffer_store_short
+; SI: buffer_store_short
define void @extract_vector_elt_v4i16(i16 addrspace(1)* %out, <4 x i16> %foo) nounwind {
%p0 = extractelement <4 x i16> %foo, i32 0
%p1 = extractelement <4 x i16> %foo, i32 2
diff --git a/test/CodeGen/R600/fabs.f64.ll b/test/CodeGen/R600/fabs.f64.ll
new file mode 100644
index 000000000000..d87c08260b4c
--- /dev/null
+++ b/test/CodeGen/R600/fabs.f64.ll
@@ -0,0 +1,97 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+declare double @fabs(double) readnone
+declare double @llvm.fabs.f64(double) readnone
+declare <2 x double> @llvm.fabs.v2f64(<2 x double>) readnone
+declare <4 x double> @llvm.fabs.v4f64(<4 x double>) readnone
+
+; FUNC-LABEL: {{^}}v_fabs_f64:
+; SI: v_and_b32
+; SI: s_endpgm
+define void @v_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %tidext = sext i32 %tid to i64
+ %gep = getelementptr double addrspace(1)* %in, i64 %tidext
+ %val = load double addrspace(1)* %gep, align 8
+ %fabs = call double @llvm.fabs.f64(double %val)
+ store double %fabs, double addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fabs_f64:
+; SI: v_and_b32
+; SI-NOT: v_and_b32
+; SI: s_endpgm
+define void @fabs_f64(double addrspace(1)* %out, double %in) {
+ %fabs = call double @llvm.fabs.f64(double %in)
+ store double %fabs, double addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fabs_v2f64:
+; SI: v_and_b32
+; SI: v_and_b32
+; SI: s_endpgm
+define void @fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
+ %fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %in)
+ store <2 x double> %fabs, <2 x double> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fabs_v4f64:
+; SI: v_and_b32
+; SI: v_and_b32
+; SI: v_and_b32
+; SI: v_and_b32
+; SI: s_endpgm
+define void @fabs_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
+ %fabs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %in)
+ store <4 x double> %fabs, <4 x double> addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}fabs_fold_f64:
+; SI: s_load_dwordx2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-NOT: and
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
+; SI: s_endpgm
+define void @fabs_fold_f64(double addrspace(1)* %out, double %in0, double %in1) {
+ %fabs = call double @llvm.fabs.f64(double %in0)
+ %fmul = fmul double %fabs, %in1
+ store double %fmul, double addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}fabs_fn_fold_f64:
+; SI: s_load_dwordx2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-NOT: and
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
+; SI: s_endpgm
+define void @fabs_fn_fold_f64(double addrspace(1)* %out, double %in0, double %in1) {
+ %fabs = call double @fabs(double %in0)
+ %fmul = fmul double %fabs, %in1
+ store double %fmul, double addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fabs_free_f64:
+; SI: v_and_b32
+; SI: s_endpgm
+define void @fabs_free_f64(double addrspace(1)* %out, i64 %in) {
+ %bc= bitcast i64 %in to double
+ %fabs = call double @llvm.fabs.f64(double %bc)
+ store double %fabs, double addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fabs_fn_free_f64:
+; SI: v_and_b32
+; SI: s_endpgm
+define void @fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) {
+ %bc= bitcast i64 %in to double
+ %fabs = call double @fabs(double %bc)
+ store double %fabs, double addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/fabs.ll b/test/CodeGen/R600/fabs.ll
index b87ce2254095..add6b75d22ae 100644
--- a/test/CodeGen/R600/fabs.ll
+++ b/test/CodeGen/R600/fabs.ll
@@ -1,65 +1,98 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+
; DAGCombiner will transform:
; (fabs (f32 bitcast (i32 a))) => (f32 bitcast (and (i32 a), 0x7FFFFFFF))
; unless isFabsFree returns true
-; R600-CHECK-LABEL: @fabs_free
-; R600-CHECK-NOT: AND
-; R600-CHECK: |PV.{{[XYZW]}}|
-; SI-CHECK-LABEL: @fabs_free
-; SI-CHECK: V_AND_B32
+; FUNC-LABEL: {{^}}fabs_fn_free:
+; R600-NOT: AND
+; R600: |PV.{{[XYZW]}}|
+
+; SI: v_and_b32
+
+define void @fabs_fn_free(float addrspace(1)* %out, i32 %in) {
+ %bc= bitcast i32 %in to float
+ %fabs = call float @fabs(float %bc)
+ store float %fabs, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fabs_free:
+; R600-NOT: AND
+; R600: |PV.{{[XYZW]}}|
+
+; SI: v_and_b32
define void @fabs_free(float addrspace(1)* %out, i32 %in) {
-entry:
- %0 = bitcast i32 %in to float
- %1 = call float @fabs(float %0)
- store float %1, float addrspace(1)* %out
+ %bc= bitcast i32 %in to float
+ %fabs = call float @llvm.fabs.f32(float %bc)
+ store float %fabs, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fabs_f32:
+; R600: |{{(PV|T[0-9])\.[XYZW]}}|
+
+; SI: v_and_b32
+define void @fabs_f32(float addrspace(1)* %out, float %in) {
+ %fabs = call float @llvm.fabs.f32(float %in)
+ store float %fabs, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fabs_v2f32:
+; R600: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600: |{{(PV|T[0-9])\.[XYZW]}}|
+
+; SI: v_and_b32
+; SI: v_and_b32
+define void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+ %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
+ store <2 x float> %fabs, <2 x float> addrspace(1)* %out
ret void
}
-; R600-CHECK-LABEL: @fabs_v2
-; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
-; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
-; SI-CHECK-LABEL: @fabs_v2
-; SI-CHECK: V_AND_B32
-; SI-CHECK: V_AND_B32
-define void @fabs_v2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
-entry:
- %0 = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
- store <2 x float> %0, <2 x float> addrspace(1)* %out
+; FUNC-LABEL: {{^}}fabs_v4f32:
+; R600: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600: |{{(PV|T[0-9])\.[XYZW]}}|
+
+; SI: v_and_b32
+; SI: v_and_b32
+; SI: v_and_b32
+; SI: v_and_b32
+define void @fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+ %fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
+ store <4 x float> %fabs, <4 x float> addrspace(1)* %out
ret void
}
-; R600-CHECK-LABEL: @fabs_v4
-; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
-; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
-; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
-; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
-; SI-CHECK-LABEL: @fabs_v4
-; SI-CHECK: V_AND_B32
-; SI-CHECK: V_AND_B32
-; SI-CHECK: V_AND_B32
-; SI-CHECK: V_AND_B32
-define void @fabs_v4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
-entry:
- %0 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
- store <4 x float> %0, <4 x float> addrspace(1)* %out
+; SI-LABEL: {{^}}fabs_fn_fold:
+; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
+; SI-NOT: and
+; SI: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
+define void @fabs_fn_fold(float addrspace(1)* %out, float %in0, float %in1) {
+ %fabs = call float @fabs(float %in0)
+ %fmul = fmul float %fabs, %in1
+ store float %fmul, float addrspace(1)* %out
ret void
}
-; SI-CHECK-LABEL: @fabs_fold
-; SI-CHECK-NOT: V_AND_B32_e32
-; SI-CHECK: V_MUL_F32_e64 v{{[0-9]+}}, s{{[0-9]+}}, |v{{[0-9]+}}|
+; SI-LABEL: {{^}}fabs_fold:
+; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
+; SI-NOT: and
+; SI: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
define void @fabs_fold(float addrspace(1)* %out, float %in0, float %in1) {
-entry:
- %0 = call float @fabs(float %in0)
- %1 = fmul float %0, %in1
- store float %1, float addrspace(1)* %out
+ %fabs = call float @llvm.fabs.f32(float %in0)
+ %fmul = fmul float %fabs, %in1
+ store float %fmul, float addrspace(1)* %out
ret void
}
-declare float @fabs(float ) readnone
-declare <2 x float> @llvm.fabs.v2f32(<2 x float> ) readnone
-declare <4 x float> @llvm.fabs.v4f32(<4 x float> ) readnone
+declare float @fabs(float) readnone
+declare float @llvm.fabs.f32(float) readnone
+declare <2 x float> @llvm.fabs.v2f32(<2 x float>) readnone
+declare <4 x float> @llvm.fabs.v4f32(<4 x float>) readnone
diff --git a/test/CodeGen/R600/fadd.ll b/test/CodeGen/R600/fadd.ll
index 5d2b806039a2..9d29c0629628 100644
--- a/test/CodeGen/R600/fadd.ll
+++ b/test/CodeGen/R600/fadd.ll
@@ -1,66 +1,63 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK --check-prefix=FUNC
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC
-; FUNC-LABEL: @fadd_f32
-; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
-; SI-CHECK: V_ADD_F32
+; FUNC-LABEL: {{^}}fadd_f32:
+; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
+; SI: v_add_f32
define void @fadd_f32(float addrspace(1)* %out, float %a, float %b) {
-entry:
- %0 = fadd float %a, %b
- store float %0, float addrspace(1)* %out
+ %add = fadd float %a, %b
+ store float %add, float addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @fadd_v2f32
-; R600-CHECK-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
-; R600-CHECK-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
-; SI-CHECK: V_ADD_F32
-; SI-CHECK: V_ADD_F32
+; FUNC-LABEL: {{^}}fadd_v2f32:
+; R600-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
+; R600-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
+; SI: v_add_f32
+; SI: v_add_f32
define void @fadd_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
-entry:
- %0 = fadd <2 x float> %a, %b
- store <2 x float> %0, <2 x float> addrspace(1)* %out
+ %add = fadd <2 x float> %a, %b
+ store <2 x float> %add, <2 x float> addrspace(1)* %out, align 8
ret void
}
-; FUNC-LABEL: @fadd_v4f32
-; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI-CHECK: V_ADD_F32
-; SI-CHECK: V_ADD_F32
-; SI-CHECK: V_ADD_F32
-; SI-CHECK: V_ADD_F32
+; FUNC-LABEL: {{^}}fadd_v4f32:
+; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
define void @fadd_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
- %a = load <4 x float> addrspace(1) * %in
- %b = load <4 x float> addrspace(1) * %b_ptr
+ %a = load <4 x float> addrspace(1)* %in, align 16
+ %b = load <4 x float> addrspace(1)* %b_ptr, align 16
%result = fadd <4 x float> %a, %b
- store <4 x float> %result, <4 x float> addrspace(1)* %out
+ store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16
ret void
}
-; FUNC-LABEL: @fadd_v8f32
-; R600-CHECK: ADD
-; R600-CHECK: ADD
-; R600-CHECK: ADD
-; R600-CHECK: ADD
-; R600-CHECK: ADD
-; R600-CHECK: ADD
-; R600-CHECK: ADD
-; R600-CHECK: ADD
-; SI-CHECK: V_ADD_F32
-; SI-CHECK: V_ADD_F32
-; SI-CHECK: V_ADD_F32
-; SI-CHECK: V_ADD_F32
-; SI-CHECK: V_ADD_F32
-; SI-CHECK: V_ADD_F32
-; SI-CHECK: V_ADD_F32
-; SI-CHECK: V_ADD_F32
+; FUNC-LABEL: {{^}}fadd_v8f32:
+; R600: ADD
+; R600: ADD
+; R600: ADD
+; R600: ADD
+; R600: ADD
+; R600: ADD
+; R600: ADD
+; R600: ADD
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
define void @fadd_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) {
-entry:
- %0 = fadd <8 x float> %a, %b
- store <8 x float> %0, <8 x float> addrspace(1)* %out
+ %add = fadd <8 x float> %a, %b
+ store <8 x float> %add, <8 x float> addrspace(1)* %out, align 32
ret void
}
diff --git a/test/CodeGen/R600/fadd64.ll b/test/CodeGen/R600/fadd64.ll
index 48cd3cfc8dfb..389c754c9e8d 100644
--- a/test/CodeGen/R600/fadd64.ll
+++ b/test/CodeGen/R600/fadd64.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s
-; CHECK: @fadd_f64
-; CHECK: V_ADD_F64 {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}
+; CHECK: {{^}}fadd_f64:
+; CHECK: v_add_f64 {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}
define void @fadd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
diff --git a/test/CodeGen/R600/fceil.ll b/test/CodeGen/R600/fceil.ll
index 458363adc1e3..7c7a7e36295c 100644
--- a/test/CodeGen/R600/fceil.ll
+++ b/test/CodeGen/R600/fceil.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare float @llvm.ceil.f32(float) nounwind readnone
@@ -8,8 +8,8 @@ declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone
declare <8 x float> @llvm.ceil.v8f32(<8 x float>) nounwind readnone
declare <16 x float> @llvm.ceil.v16f32(<16 x float>) nounwind readnone
-; FUNC-LABEL: @fceil_f32:
-; SI: V_CEIL_F32_e32
+; FUNC-LABEL: {{^}}fceil_f32:
+; SI: v_ceil_f32_e32
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
; EG: CEIL {{\*? *}}[[RESULT]]
define void @fceil_f32(float addrspace(1)* %out, float %x) {
@@ -18,9 +18,9 @@ define void @fceil_f32(float addrspace(1)* %out, float %x) {
ret void
}
-; FUNC-LABEL: @fceil_v2f32:
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
+; FUNC-LABEL: {{^}}fceil_v2f32:
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
; EG: CEIL {{\*? *}}[[RESULT]]
; EG: CEIL {{\*? *}}[[RESULT]]
@@ -30,10 +30,10 @@ define void @fceil_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) {
ret void
}
-; FUNC-LABEL: @fceil_v3f32:
-; FIXME-SI: V_CEIL_F32_e32
-; FIXME-SI: V_CEIL_F32_e32
-; FIXME-SI: V_CEIL_F32_e32
+; FUNC-LABEL: {{^}}fceil_v3f32:
+; FIXME-SI: v_ceil_f32_e32
+; FIXME-SI: v_ceil_f32_e32
+; FIXME-SI: v_ceil_f32_e32
; FIXME-EG: v3 is treated as v2 and v1, hence 2 stores
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
@@ -46,11 +46,11 @@ define void @fceil_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %x) {
ret void
}
-; FUNC-LABEL: @fceil_v4f32:
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
+; FUNC-LABEL: {{^}}fceil_v4f32:
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
; EG: CEIL {{\*? *}}[[RESULT]]
; EG: CEIL {{\*? *}}[[RESULT]]
@@ -62,15 +62,15 @@ define void @fceil_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %x) {
ret void
}
-; FUNC-LABEL: @fceil_v8f32:
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
+; FUNC-LABEL: {{^}}fceil_v8f32:
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
@@ -87,23 +87,23 @@ define void @fceil_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %x) {
ret void
}
-; FUNC-LABEL: @fceil_v16f32:
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
+; FUNC-LABEL: {{^}}fceil_v16f32:
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT3:T[0-9]+]]{{\.[XYZW]}}
diff --git a/test/CodeGen/R600/fceil64.ll b/test/CodeGen/R600/fceil64.ll
index b42aefa17328..77cd8eae402c 100644
--- a/test/CodeGen/R600/fceil64.ll
+++ b/test/CodeGen/R600/fceil64.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
declare double @llvm.ceil.f64(double) nounwind readnone
declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone
@@ -8,94 +8,95 @@ declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone
declare <8 x double> @llvm.ceil.v8f64(<8 x double>) nounwind readnone
declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone
-; FUNC-LABEL: @fceil_f64:
-; CI: V_CEIL_F64_e32
-; SI: S_BFE_I32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
-; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
-; SI: S_LSHR_B64
-; SI: S_NOT_B64
-; SI: S_AND_B64
-; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
-; SI: CMP_LT_I32
-; SI: CNDMASK_B32
-; SI: CNDMASK_B32
-; SI: CMP_GT_I32
-; SI: CNDMASK_B32
-; SI: CNDMASK_B32
-; SI: CMP_GT_F64
-; SI: CNDMASK_B32
-; SI: CMP_NE_I32
-; SI: CNDMASK_B32
-; SI: CNDMASK_B32
-; SI: V_ADD_F64
+; FUNC-LABEL: {{^}}fceil_f64:
+; CI: v_ceil_f64_e32
+; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
+; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
+; SI: s_lshr_b64
+; SI: s_not_b64
+; SI: s_and_b64
+; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
+; SI-DAG: cmp_lt_i32
+; SI: cndmask_b32
+; SI: cndmask_b32
+; SI: cmp_gt_i32
+; SI: cndmask_b32
+; SI: cndmask_b32
+; SI: v_cmp_lg_f64
+; SI: v_cmp_gt_f64
+; SI: s_and_b64
+; SI: v_cndmask_b32
+; SI: v_cndmask_b32
+; SI: v_add_f64
+; SI: s_endpgm
define void @fceil_f64(double addrspace(1)* %out, double %x) {
%y = call double @llvm.ceil.f64(double %x) nounwind readnone
store double %y, double addrspace(1)* %out
ret void
}
-; FUNC-LABEL: @fceil_v2f64:
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
+; FUNC-LABEL: {{^}}fceil_v2f64:
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
%y = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x) nounwind readnone
store <2 x double> %y, <2 x double> addrspace(1)* %out
ret void
}
-; FIXME-FUNC-LABEL: @fceil_v3f64:
-; FIXME-CI: V_CEIL_F64_e32
-; FIXME-CI: V_CEIL_F64_e32
-; FIXME-CI: V_CEIL_F64_e32
+; FIXME-FUNC-LABEL: {{^}}fceil_v3f64:
+; FIXME-CI: v_ceil_f64_e32
+; FIXME-CI: v_ceil_f64_e32
+; FIXME-CI: v_ceil_f64_e32
; define void @fceil_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
; %y = call <3 x double> @llvm.ceil.v3f64(<3 x double> %x) nounwind readnone
; store <3 x double> %y, <3 x double> addrspace(1)* %out
; ret void
; }
-; FUNC-LABEL: @fceil_v4f64:
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
+; FUNC-LABEL: {{^}}fceil_v4f64:
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
define void @fceil_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
%y = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone
store <4 x double> %y, <4 x double> addrspace(1)* %out
ret void
}
-; FUNC-LABEL: @fceil_v8f64:
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
+; FUNC-LABEL: {{^}}fceil_v8f64:
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
define void @fceil_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
%y = call <8 x double> @llvm.ceil.v8f64(<8 x double> %x) nounwind readnone
store <8 x double> %y, <8 x double> addrspace(1)* %out
ret void
}
-; FUNC-LABEL: @fceil_v16f64:
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
+; FUNC-LABEL: {{^}}fceil_v16f64:
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
define void @fceil_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
%y = call <16 x double> @llvm.ceil.v16f64(<16 x double> %x) nounwind readnone
store <16 x double> %y, <16 x double> addrspace(1)* %out
diff --git a/test/CodeGen/R600/fcmp.ll b/test/CodeGen/R600/fcmp.ll
index c76a75876565..33992181e0d9 100644
--- a/test/CodeGen/R600/fcmp.ll
+++ b/test/CodeGen/R600/fcmp.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-; CHECK: @fcmp_sext
+; CHECK: {{^}}fcmp_sext:
; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @fcmp_sext(i32 addrspace(1)* %out, float addrspace(1)* %in) {
@@ -18,7 +18,7 @@ entry:
; SET*_DX10 instruction. Previously we were lowering this to:
; SET* + FP_TO_SINT
-; CHECK: @fcmp_br
+; CHECK: {{^}}fcmp_br:
; CHECK: SET{{[N]*}}E_DX10 * T{{[0-9]+\.[XYZW],}}
; CHECK-NEXT {{[0-9]+(5.0}}
diff --git a/test/CodeGen/R600/fcmp64.ll b/test/CodeGen/R600/fcmp64.ll
index 8cbe9f686648..032a4e416dc1 100644
--- a/test/CodeGen/R600/fcmp64.ll
+++ b/test/CodeGen/R600/fcmp64.ll
@@ -1,60 +1,55 @@
-; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s
-; CHECK: @flt_f64
-; CHECK: V_CMP_LT_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
-
-define void @flt_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+; CHECK-LABEL: {{^}}flt_f64:
+; CHECK: v_cmp_nge_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
+define void @flt_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
%r0 = load double addrspace(1)* %in1
%r1 = load double addrspace(1)* %in2
%r2 = fcmp ult double %r0, %r1
- %r3 = select i1 %r2, double %r0, double %r1
- store double %r3, double addrspace(1)* %out
+ %r3 = zext i1 %r2 to i32
+ store i32 %r3, i32 addrspace(1)* %out
ret void
}
-; CHECK: @fle_f64
-; CHECK: V_CMP_LE_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
-
-define void @fle_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+; CHECK-LABEL: {{^}}fle_f64:
+; CHECK: v_cmp_ngt_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
+define void @fle_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
%r0 = load double addrspace(1)* %in1
%r1 = load double addrspace(1)* %in2
%r2 = fcmp ule double %r0, %r1
- %r3 = select i1 %r2, double %r0, double %r1
- store double %r3, double addrspace(1)* %out
+ %r3 = zext i1 %r2 to i32
+ store i32 %r3, i32 addrspace(1)* %out
ret void
}
-; CHECK: @fgt_f64
-; CHECK: V_CMP_GT_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
-
-define void @fgt_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+; CHECK-LABEL: {{^}}fgt_f64:
+; CHECK: v_cmp_nle_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
+define void @fgt_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
%r0 = load double addrspace(1)* %in1
%r1 = load double addrspace(1)* %in2
%r2 = fcmp ugt double %r0, %r1
- %r3 = select i1 %r2, double %r0, double %r1
- store double %r3, double addrspace(1)* %out
+ %r3 = zext i1 %r2 to i32
+ store i32 %r3, i32 addrspace(1)* %out
ret void
}
-; CHECK: @fge_f64
-; CHECK: V_CMP_GE_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
-
-define void @fge_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+; CHECK-LABEL: {{^}}fge_f64:
+; CHECK: v_cmp_nlt_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
+define void @fge_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
%r0 = load double addrspace(1)* %in1
%r1 = load double addrspace(1)* %in2
%r2 = fcmp uge double %r0, %r1
- %r3 = select i1 %r2, double %r0, double %r1
- store double %r3, double addrspace(1)* %out
+ %r3 = zext i1 %r2 to i32
+ store i32 %r3, i32 addrspace(1)* %out
ret void
}
-; CHECK: @fne_f64
-; CHECK: V_CMP_NEQ_F64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
-
+; CHECK-LABEL: {{^}}fne_f64:
+; CHECK: v_cmp_neq_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define void @fne_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
%r0 = load double addrspace(1)* %in1
@@ -65,9 +60,8 @@ define void @fne_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
ret void
}
-; CHECK: @feq_f64
-; CHECK: V_CMP_EQ_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
-
+; CHECK-LABEL: {{^}}feq_f64:
+; CHECK: v_cmp_nlg_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define void @feq_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
%r0 = load double addrspace(1)* %in1
diff --git a/test/CodeGen/R600/fconst64.ll b/test/CodeGen/R600/fconst64.ll
index 9c3a7e3d2e93..f3bc399972d5 100644
--- a/test/CodeGen/R600/fconst64.ll
+++ b/test/CodeGen/R600/fconst64.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s
-; CHECK: @fconst_f64
-; CHECK-DAG: S_MOV_B32 {{s[0-9]+}}, 0x40140000
-; CHECK-DAG: S_MOV_B32 {{s[0-9]+}}, 0
+; CHECK: {{^}}fconst_f64:
+; CHECK-DAG: s_mov_b32 {{s[0-9]+}}, 0x40140000
+; CHECK-DAG: s_mov_b32 {{s[0-9]+}}, 0
define void @fconst_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
%r1 = load double addrspace(1)* %in
diff --git a/test/CodeGen/R600/fcopysign.f32.ll b/test/CodeGen/R600/fcopysign.f32.ll
index 7b4425bed724..4bc5145bd4de 100644
--- a/test/CodeGen/R600/fcopysign.f32.ll
+++ b/test/CodeGen/R600/fcopysign.f32.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
@@ -7,15 +7,15 @@ declare <2 x float> @llvm.copysign.v2f32(<2 x float>, <2 x float>) nounwind read
declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) nounwind readnone
; Try to identify arg based on higher address.
-; FUNC-LABEL: @test_copysign_f32:
-; SI: S_LOAD_DWORD [[SSIGN:s[0-9]+]], {{.*}} 0xc
-; SI: V_MOV_B32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]]
-; SI-DAG: S_LOAD_DWORD [[SMAG:s[0-9]+]], {{.*}} 0xb
-; SI-DAG: V_MOV_B32_e32 [[VMAG:v[0-9]+]], [[SMAG]]
-; SI-DAG: S_MOV_B32 [[SCONST:s[0-9]+]], 0x7fffffff
-; SI: V_BFI_B32 [[RESULT:v[0-9]+]], [[SCONST]], [[VMAG]], [[VSIGN]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}test_copysign_f32:
+; SI: s_load_dword [[SMAG:s[0-9]+]], {{.*}} 0xb
+; SI: s_load_dword [[SSIGN:s[0-9]+]], {{.*}} 0xc
+; SI-DAG: v_mov_b32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]]
+; SI-DAG: v_mov_b32_e32 [[VMAG:v[0-9]+]], [[SMAG]]
+; SI-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
+; SI: v_bfi_b32 [[RESULT:v[0-9]+]], [[SCONST]], [[VMAG]], [[VSIGN]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
; EG: BFI_INT
define void @test_copysign_f32(float addrspace(1)* %out, float %mag, float %sign) nounwind {
@@ -24,8 +24,8 @@ define void @test_copysign_f32(float addrspace(1)* %out, float %mag, float %sign
ret void
}
-; FUNC-LABEL: @test_copysign_v2f32:
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}test_copysign_v2f32:
+; SI: s_endpgm
; EG: BFI_INT
; EG: BFI_INT
@@ -35,8 +35,8 @@ define void @test_copysign_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %ma
ret void
}
-; FUNC-LABEL: @test_copysign_v4f32:
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}test_copysign_v4f32:
+; SI: s_endpgm
; EG: BFI_INT
; EG: BFI_INT
diff --git a/test/CodeGen/R600/fcopysign.f64.ll b/test/CodeGen/R600/fcopysign.f64.ll
index ea7a6db67f34..a14a493f72c8 100644
--- a/test/CodeGen/R600/fcopysign.f64.ll
+++ b/test/CodeGen/R600/fcopysign.f64.ll
@@ -1,35 +1,35 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
declare double @llvm.copysign.f64(double, double) nounwind readnone
declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) nounwind readnone
declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) nounwind readnone
-; FUNC-LABEL: @test_copysign_f64:
-; SI-DAG: S_LOAD_DWORDX2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; SI: V_MOV_B32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]]
-; SI-DAG: S_LOAD_DWORDX2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: V_MOV_B32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]]
-; SI-DAG: S_MOV_B32 [[SCONST:s[0-9]+]], 0x7fffffff
-; SI: V_BFI_B32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]]
-; SI: V_MOV_B32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]]
-; SI: BUFFER_STORE_DWORDX2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}}
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}test_copysign_f64:
+; SI-DAG: s_load_dwordx2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dwordx2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: v_mov_b32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]]
+; SI-DAG: v_mov_b32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]]
+; SI-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
+; SI: v_bfi_b32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]]
+; SI: v_mov_b32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]]
+; SI: buffer_store_dwordx2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}}
+; SI: s_endpgm
define void @test_copysign_f64(double addrspace(1)* %out, double %mag, double %sign) nounwind {
%result = call double @llvm.copysign.f64(double %mag, double %sign)
store double %result, double addrspace(1)* %out, align 8
ret void
}
-; FUNC-LABEL: @test_copysign_v2f64:
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}test_copysign_v2f64:
+; SI: s_endpgm
define void @test_copysign_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %mag, <2 x double> %sign) nounwind {
%result = call <2 x double> @llvm.copysign.v2f64(<2 x double> %mag, <2 x double> %sign)
store <2 x double> %result, <2 x double> addrspace(1)* %out, align 8
ret void
}
-; FUNC-LABEL: @test_copysign_v4f64:
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}test_copysign_v4f64:
+; SI: s_endpgm
define void @test_copysign_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %mag, <4 x double> %sign) nounwind {
%result = call <4 x double> @llvm.copysign.v4f64(<4 x double> %mag, <4 x double> %sign)
store <4 x double> %result, <4 x double> addrspace(1)* %out, align 8
diff --git a/test/CodeGen/R600/fdiv.ll b/test/CodeGen/R600/fdiv.ll
index 20db65c5eb60..f83b88ee1c9d 100644
--- a/test/CodeGen/R600/fdiv.ll
+++ b/test/CodeGen/R600/fdiv.ll
@@ -1,18 +1,18 @@
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 %s
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; These tests check that fdiv is expanded correctly and also test that the
; scheduler is scheduling the RECIP_IEEE and MUL_IEEE instructions in separate
; instruction groups.
-; FUNC-LABEL: @fdiv_f32
+; FUNC-LABEL: {{^}}fdiv_f32:
; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
-; SI-DAG: V_RCP_F32
-; SI-DAG: V_MUL_F32
+; SI-DAG: v_rcp_f32
+; SI-DAG: v_mul_f32
define void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fdiv float %a, %b
@@ -22,16 +22,16 @@ entry:
-; FUNC-LABEL: @fdiv_v2f32
+; FUNC-LABEL: {{^}}fdiv_v2f32:
; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
-; SI-DAG: V_RCP_F32
-; SI-DAG: V_MUL_F32
-; SI-DAG: V_RCP_F32
-; SI-DAG: V_MUL_F32
+; SI-DAG: v_rcp_f32
+; SI-DAG: v_mul_f32
+; SI-DAG: v_rcp_f32
+; SI-DAG: v_mul_f32
define void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
entry:
%0 = fdiv <2 x float> %a, %b
@@ -39,7 +39,7 @@ entry:
ret void
}
-; FUNC-LABEL: @fdiv_v4f32
+; FUNC-LABEL: {{^}}fdiv_v4f32:
; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
@@ -49,14 +49,14 @@ entry:
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
-; SI-DAG: V_RCP_F32
-; SI-DAG: V_MUL_F32
-; SI-DAG: V_RCP_F32
-; SI-DAG: V_MUL_F32
-; SI-DAG: V_RCP_F32
-; SI-DAG: V_MUL_F32
-; SI-DAG: V_RCP_F32
-; SI-DAG: V_MUL_F32
+; SI-DAG: v_rcp_f32
+; SI-DAG: v_mul_f32
+; SI-DAG: v_rcp_f32
+; SI-DAG: v_mul_f32
+; SI-DAG: v_rcp_f32
+; SI-DAG: v_mul_f32
+; SI-DAG: v_rcp_f32
+; SI-DAG: v_mul_f32
define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
%a = load <4 x float> addrspace(1) * %in
diff --git a/test/CodeGen/R600/fdiv64.ll b/test/CodeGen/R600/fdiv64.ll
index 79b5c8bb96ee..0611b153f5cc 100644
--- a/test/CodeGen/R600/fdiv64.ll
+++ b/test/CodeGen/R600/fdiv64.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s
-; CHECK: @fdiv_f64
-; CHECK: V_RCP_F64_e32 {{v\[[0-9]+:[0-9]+\]}}
-; CHECK: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
+; CHECK: {{^}}fdiv_f64:
+; CHECK: v_rcp_f64_e32 {{v\[[0-9]+:[0-9]+\]}}
+; CHECK: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
define void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
diff --git a/test/CodeGen/R600/fetch-limits.r600.ll b/test/CodeGen/R600/fetch-limits.r600.ll
index f78d1d968e5d..d35573e818d4 100644
--- a/test/CodeGen/R600/fetch-limits.r600.ll
+++ b/test/CodeGen/R600/fetch-limits.r600.ll
@@ -3,7 +3,7 @@
; RUN: llc < %s -march=r600 -mcpu=rv670 | FileCheck %s
; R600 supports 8 fetches in a clause
-; CHECK: @fetch_limits_r600
+; CHECK: {{^}}fetch_limits_r600:
; CHECK: Fetch clause
; CHECK: Fetch clause
diff --git a/test/CodeGen/R600/fetch-limits.r700+.ll b/test/CodeGen/R600/fetch-limits.r700+.ll
index 1a8a43fccc72..17760a05caa4 100644
--- a/test/CodeGen/R600/fetch-limits.r700+.ll
+++ b/test/CodeGen/R600/fetch-limits.r700+.ll
@@ -12,7 +12,7 @@
; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s
; r700+ supports 16 fetches in a clause
-; CHECK: @fetch_limits_r700
+; CHECK: {{^}}fetch_limits_r700:
; CHECK: Fetch clause
; CHECK: Fetch clause
diff --git a/test/CodeGen/R600/ffloor.ll b/test/CodeGen/R600/ffloor.ll
index 31c6116988e6..194d0aaa1819 100644
--- a/test/CodeGen/R600/ffloor.ll
+++ b/test/CodeGen/R600/ffloor.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
declare double @llvm.floor.f64(double) nounwind readnone
declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone
@@ -8,95 +8,96 @@ declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone
declare <8 x double> @llvm.floor.v8f64(<8 x double>) nounwind readnone
declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone
-; FUNC-LABEL: @ffloor_f64:
-; CI: V_FLOOR_F64_e32
+; FUNC-LABEL: {{^}}ffloor_f64:
+; CI: v_floor_f64_e32
-; SI: S_BFE_I32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
-; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
-; SI: S_LSHR_B64
-; SI: S_NOT_B64
-; SI: S_AND_B64
-; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
-; SI: CMP_LT_I32
-; SI: CNDMASK_B32
-; SI: CNDMASK_B32
-; SI: CMP_GT_I32
-; SI: CNDMASK_B32
-; SI: CNDMASK_B32
-; SI: CMP_LT_F64
-; SI: CNDMASK_B32
-; SI: CMP_NE_I32
-; SI: CNDMASK_B32
-; SI: CNDMASK_B32
-; SI: V_ADD_F64
+; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
+; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
+; SI: s_lshr_b64
+; SI: s_not_b64
+; SI: s_and_b64
+; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
+; SI-DAG: cmp_lt_i32
+; SI: cndmask_b32
+; SI: cndmask_b32
+; SI: cmp_gt_i32
+; SI: cndmask_b32
+; SI: cndmask_b32
+; SI: v_cmp_lg_f64
+; SI: v_cmp_lt_f64
+; SI: s_and_b64
+; SI: v_cndmask_b32
+; SI: v_cndmask_b32
+; SI: v_add_f64
+; SI: s_endpgm
define void @ffloor_f64(double addrspace(1)* %out, double %x) {
%y = call double @llvm.floor.f64(double %x) nounwind readnone
store double %y, double addrspace(1)* %out
ret void
}
-; FUNC-LABEL: @ffloor_v2f64:
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
+; FUNC-LABEL: {{^}}ffloor_v2f64:
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
%y = call <2 x double> @llvm.floor.v2f64(<2 x double> %x) nounwind readnone
store <2 x double> %y, <2 x double> addrspace(1)* %out
ret void
}
-; FIXME-FUNC-LABEL: @ffloor_v3f64:
-; FIXME-CI: V_FLOOR_F64_e32
-; FIXME-CI: V_FLOOR_F64_e32
-; FIXME-CI: V_FLOOR_F64_e32
+; FIXME-FUNC-LABEL: {{^}}ffloor_v3f64:
+; FIXME-CI: v_floor_f64_e32
+; FIXME-CI: v_floor_f64_e32
+; FIXME-CI: v_floor_f64_e32
; define void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
; %y = call <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone
; store <3 x double> %y, <3 x double> addrspace(1)* %out
; ret void
; }
-; FUNC-LABEL: @ffloor_v4f64:
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
+; FUNC-LABEL: {{^}}ffloor_v4f64:
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
define void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
%y = call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone
store <4 x double> %y, <4 x double> addrspace(1)* %out
ret void
}
-; FUNC-LABEL: @ffloor_v8f64:
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
+; FUNC-LABEL: {{^}}ffloor_v8f64:
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
define void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
%y = call <8 x double> @llvm.floor.v8f64(<8 x double> %x) nounwind readnone
store <8 x double> %y, <8 x double> addrspace(1)* %out
ret void
}
-; FUNC-LABEL: @ffloor_v16f64:
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
+; FUNC-LABEL: {{^}}ffloor_v16f64:
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
define void @ffloor_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
%y = call <16 x double> @llvm.floor.v16f64(<16 x double> %x) nounwind readnone
store <16 x double> %y, <16 x double> addrspace(1)* %out
diff --git a/test/CodeGen/R600/flat-address-space.ll b/test/CodeGen/R600/flat-address-space.ll
new file mode 100644
index 000000000000..99ef41b24e4f
--- /dev/null
+++ b/test/CodeGen/R600/flat-address-space.ll
@@ -0,0 +1,182 @@
+; RUN: llc -O0 -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-NO-PROMOTE %s
+; RUN: llc -O0 -march=amdgcn -mcpu=bonaire -mattr=+promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PROMOTE %s
+
+; Disable optimizations in case there are optimizations added that
+; specialize away generic pointer accesses.
+
+
+; CHECK-LABEL: {{^}}branch_use_flat_i32:
+; CHECK: flat_store_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, [M0, FLAT_SCRATCH]
+; CHECK: s_endpgm
+define void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 {
+entry:
+ %cmp = icmp ne i32 %c, 0
+ br i1 %cmp, label %local, label %global
+
+local:
+ %flat_local = addrspacecast i32 addrspace(3)* %lptr to i32 addrspace(4)*
+ br label %end
+
+global:
+ %flat_global = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
+ br label %end
+
+end:
+ %fptr = phi i32 addrspace(4)* [ %flat_local, %local ], [ %flat_global, %global ]
+ store i32 %x, i32 addrspace(4)* %fptr, align 4
+; %val = load i32 addrspace(4)* %fptr, align 4
+; store i32 %val, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+
+
+; These testcases might become useless when there are optimizations to
+; remove generic pointers.
+
+; CHECK-LABEL: {{^}}store_flat_i32:
+; CHECK: v_mov_b32_e32 v[[DATA:[0-9]+]], {{s[0-9]+}}
+; CHECK: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], {{s[0-9]+}}
+; CHECK: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], {{s[0-9]+}}
+; CHECK: flat_store_dword v[[DATA]], v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
+define void @store_flat_i32(i32 addrspace(1)* %gptr, i32 %x) #0 {
+ %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
+ store i32 %x, i32 addrspace(4)* %fptr, align 4
+ ret void
+}
+
+; CHECK-LABEL: {{^}}store_flat_i64:
+; CHECK: flat_store_dwordx2
+define void @store_flat_i64(i64 addrspace(1)* %gptr, i64 %x) #0 {
+ %fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
+ store i64 %x, i64 addrspace(4)* %fptr, align 8
+ ret void
+}
+
+; CHECK-LABEL: {{^}}store_flat_v4i32:
+; CHECK: flat_store_dwordx4
+define void @store_flat_v4i32(<4 x i32> addrspace(1)* %gptr, <4 x i32> %x) #0 {
+ %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
+ store <4 x i32> %x, <4 x i32> addrspace(4)* %fptr, align 16
+ ret void
+}
+
+; CHECK-LABEL: {{^}}store_flat_trunc_i16:
+; CHECK: flat_store_short
+define void @store_flat_trunc_i16(i16 addrspace(1)* %gptr, i32 %x) #0 {
+ %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
+ %y = trunc i32 %x to i16
+ store i16 %y, i16 addrspace(4)* %fptr, align 2
+ ret void
+}
+
+; CHECK-LABEL: {{^}}store_flat_trunc_i8:
+; CHECK: flat_store_byte
+define void @store_flat_trunc_i8(i8 addrspace(1)* %gptr, i32 %x) #0 {
+ %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
+ %y = trunc i32 %x to i8
+ store i8 %y, i8 addrspace(4)* %fptr, align 2
+ ret void
+}
+
+
+
+; CHECK-LABEL @load_flat_i32:
+; CHECK: flat_load_dword
+define void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %gptr) #0 {
+ %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
+ %fload = load i32 addrspace(4)* %fptr, align 4
+ store i32 %fload, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; CHECK-LABEL @load_flat_i64:
+; CHECK: flat_load_dwordx2
+define void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %gptr) #0 {
+ %fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
+ %fload = load i64 addrspace(4)* %fptr, align 4
+ store i64 %fload, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; CHECK-LABEL @load_flat_v4i32:
+; CHECK: flat_load_dwordx4
+define void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %gptr) #0 {
+ %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
+ %fload = load <4 x i32> addrspace(4)* %fptr, align 4
+ store <4 x i32> %fload, <4 x i32> addrspace(1)* %out, align 8
+ ret void
+}
+
+; CHECK-LABEL @sextload_flat_i8:
+; CHECK: flat_load_sbyte
+define void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
+ %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
+ %fload = load i8 addrspace(4)* %fptr, align 4
+ %ext = sext i8 %fload to i32
+ store i32 %ext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; CHECK-LABEL @zextload_flat_i8:
+; CHECK: flat_load_ubyte
+define void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
+ %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
+ %fload = load i8 addrspace(4)* %fptr, align 4
+ %ext = zext i8 %fload to i32
+ store i32 %ext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; CHECK-LABEL @sextload_flat_i16:
+; CHECK: flat_load_sshort
+define void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
+ %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
+ %fload = load i16 addrspace(4)* %fptr, align 4
+ %ext = sext i16 %fload to i32
+ store i32 %ext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; CHECK-LABEL @zextload_flat_i16:
+; CHECK: flat_load_ushort
+define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
+ %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
+ %fload = load i16 addrspace(4)* %fptr, align 4
+ %ext = zext i16 %fload to i32
+ store i32 %ext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+
+
+; TODO: This should not be zero when registers are used for small
+; scratch allocations again.
+
+; Check for prologue initializing special SGPRs pointing to scratch.
+; CHECK-LABEL: {{^}}store_flat_scratch:
+; CHECK: s_movk_i32 flat_scratch_lo, 0
+; CHECK-NO-PROMOTE: s_movk_i32 flat_scratch_hi, 0x28{{$}}
+; CHECK-PROMOTE: s_movk_i32 flat_scratch_hi, 0x0{{$}}
+; CHECK: flat_store_dword
+; CHECK: s_barrier
+; CHECK: flat_load_dword
+define void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 {
+ %alloca = alloca i32, i32 9, align 4
+ %x = call i32 @llvm.r600.read.tidig.x() #3
+ %pptr = getelementptr i32* %alloca, i32 %x
+ %fptr = addrspacecast i32* %pptr to i32 addrspace(4)*
+ store i32 %x, i32 addrspace(4)* %fptr
+ ; Dummy call
+ call void @llvm.AMDGPU.barrier.local() #1
+ %reload = load i32 addrspace(4)* %fptr, align 4
+ store i32 %reload, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+declare void @llvm.AMDGPU.barrier.local() #1
+declare i32 @llvm.r600.read.tidig.x() #3
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind noduplicate }
+attributes #3 = { nounwind readnone }
diff --git a/test/CodeGen/R600/fma.f64.ll b/test/CodeGen/R600/fma.f64.ll
new file mode 100644
index 000000000000..48b1093ecd0b
--- /dev/null
+++ b/test/CodeGen/R600/fma.f64.ll
@@ -0,0 +1,46 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare double @llvm.fma.f64(double, double, double) nounwind readnone
+declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
+
+
+; FUNC-LABEL: {{^}}fma_f64:
+; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+ double addrspace(1)* %in2, double addrspace(1)* %in3) {
+ %r0 = load double addrspace(1)* %in1
+ %r1 = load double addrspace(1)* %in2
+ %r2 = load double addrspace(1)* %in3
+ %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %r2)
+ store double %r3, double addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fma_v2f64:
+; SI: v_fma_f64
+; SI: v_fma_f64
+define void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
+ <2 x double> addrspace(1)* %in2, <2 x double> addrspace(1)* %in3) {
+ %r0 = load <2 x double> addrspace(1)* %in1
+ %r1 = load <2 x double> addrspace(1)* %in2
+ %r2 = load <2 x double> addrspace(1)* %in3
+ %r3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %r0, <2 x double> %r1, <2 x double> %r2)
+ store <2 x double> %r3, <2 x double> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fma_v4f64:
+; SI: v_fma_f64
+; SI: v_fma_f64
+; SI: v_fma_f64
+; SI: v_fma_f64
+define void @fma_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1,
+ <4 x double> addrspace(1)* %in2, <4 x double> addrspace(1)* %in3) {
+ %r0 = load <4 x double> addrspace(1)* %in1
+ %r1 = load <4 x double> addrspace(1)* %in2
+ %r2 = load <4 x double> addrspace(1)* %in3
+ %r3 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %r0, <4 x double> %r1, <4 x double> %r2)
+ store <4 x double> %r3, <4 x double> addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/fma.ll b/test/CodeGen/R600/fma.ll
index d72ffeceb921..f3861ffa2835 100644
--- a/test/CodeGen/R600/fma.ll
+++ b/test/CodeGen/R600/fma.ll
@@ -1,89 +1,92 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare float @llvm.fma.f32(float, float, float) nounwind readnone
declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
-declare double @llvm.fma.f64(double, double, double) nounwind readnone
-declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
-declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-; FUNC-LABEL: @fma_f32
-; SI: V_FMA_F32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
+; FUNC-LABEL: {{^}}fma_f32:
+; SI: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
+
+; EG: MEM_RAT_{{.*}} STORE_{{.*}} [[RES:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}},
+; EG: FMA {{\*? *}}[[RES]]
define void @fma_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
float addrspace(1)* %in2, float addrspace(1)* %in3) {
- %r0 = load float addrspace(1)* %in1
- %r1 = load float addrspace(1)* %in2
- %r2 = load float addrspace(1)* %in3
- %r3 = tail call float @llvm.fma.f32(float %r0, float %r1, float %r2)
- store float %r3, float addrspace(1)* %out
- ret void
+ %r0 = load float addrspace(1)* %in1
+ %r1 = load float addrspace(1)* %in2
+ %r2 = load float addrspace(1)* %in3
+ %r3 = tail call float @llvm.fma.f32(float %r0, float %r1, float %r2)
+ store float %r3, float addrspace(1)* %out
+ ret void
}
-; FUNC-LABEL: @fma_v2f32
-; SI: V_FMA_F32
-; SI: V_FMA_F32
+; FUNC-LABEL: {{^}}fma_v2f32:
+; SI: v_fma_f32
+; SI: v_fma_f32
+
+; EG: MEM_RAT_{{.*}} STORE_{{.*}} [[RES:T[0-9]]].[[CHLO:[XYZW]]][[CHHI:[XYZW]]], {{T[0-9]\.[XYZW]}},
+; EG-DAG: FMA {{\*? *}}[[RES]].[[CHLO]]
+; EG-DAG: FMA {{\*? *}}[[RES]].[[CHHI]]
define void @fma_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in1,
<2 x float> addrspace(1)* %in2, <2 x float> addrspace(1)* %in3) {
- %r0 = load <2 x float> addrspace(1)* %in1
- %r1 = load <2 x float> addrspace(1)* %in2
- %r2 = load <2 x float> addrspace(1)* %in3
- %r3 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %r0, <2 x float> %r1, <2 x float> %r2)
- store <2 x float> %r3, <2 x float> addrspace(1)* %out
- ret void
+ %r0 = load <2 x float> addrspace(1)* %in1
+ %r1 = load <2 x float> addrspace(1)* %in2
+ %r2 = load <2 x float> addrspace(1)* %in3
+ %r3 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %r0, <2 x float> %r1, <2 x float> %r2)
+ store <2 x float> %r3, <2 x float> addrspace(1)* %out
+ ret void
}
-; FUNC-LABEL: @fma_v4f32
-; SI: V_FMA_F32
-; SI: V_FMA_F32
-; SI: V_FMA_F32
-; SI: V_FMA_F32
+; FUNC-LABEL: {{^}}fma_v4f32:
+; SI: v_fma_f32
+; SI: v_fma_f32
+; SI: v_fma_f32
+; SI: v_fma_f32
+
+; EG: MEM_RAT_{{.*}} STORE_{{.*}} [[RES:T[0-9]]].{{[XYZW][XYZW][XYZW][XYZW]}}, {{T[0-9]\.[XYZW]}},
+; EG-DAG: FMA {{\*? *}}[[RES]].X
+; EG-DAG: FMA {{\*? *}}[[RES]].Y
+; EG-DAG: FMA {{\*? *}}[[RES]].Z
+; EG-DAG: FMA {{\*? *}}[[RES]].W
define void @fma_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in1,
<4 x float> addrspace(1)* %in2, <4 x float> addrspace(1)* %in3) {
- %r0 = load <4 x float> addrspace(1)* %in1
- %r1 = load <4 x float> addrspace(1)* %in2
- %r2 = load <4 x float> addrspace(1)* %in3
- %r3 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %r0, <4 x float> %r1, <4 x float> %r2)
- store <4 x float> %r3, <4 x float> addrspace(1)* %out
- ret void
+ %r0 = load <4 x float> addrspace(1)* %in1
+ %r1 = load <4 x float> addrspace(1)* %in2
+ %r2 = load <4 x float> addrspace(1)* %in3
+ %r3 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %r0, <4 x float> %r1, <4 x float> %r2)
+ store <4 x float> %r3, <4 x float> addrspace(1)* %out
+ ret void
}
-; FUNC-LABEL: @fma_f64
-; SI: V_FMA_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
- double addrspace(1)* %in2, double addrspace(1)* %in3) {
- %r0 = load double addrspace(1)* %in1
- %r1 = load double addrspace(1)* %in2
- %r2 = load double addrspace(1)* %in3
- %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %r2)
- store double %r3, double addrspace(1)* %out
- ret void
-}
+; FUNC-LABEL: @fma_commute_mul_inline_imm_f32
+; SI: v_fma_f32 {{v[0-9]+}}, 2.0, {{v[0-9]+}}, {{v[0-9]+}}
+define void @fma_commute_mul_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %in.a.gep = getelementptr float addrspace(1)* %in.a, i32 %tid
+ %in.b.gep = getelementptr float addrspace(1)* %in.b, i32 %tid
+ %out.gep = getelementptr float addrspace(1)* %out, i32 %tid
-; FUNC-LABEL: @fma_v2f64
-; SI: V_FMA_F64
-; SI: V_FMA_F64
-define void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
- <2 x double> addrspace(1)* %in2, <2 x double> addrspace(1)* %in3) {
- %r0 = load <2 x double> addrspace(1)* %in1
- %r1 = load <2 x double> addrspace(1)* %in2
- %r2 = load <2 x double> addrspace(1)* %in3
- %r3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %r0, <2 x double> %r1, <2 x double> %r2)
- store <2 x double> %r3, <2 x double> addrspace(1)* %out
- ret void
+ %a = load float addrspace(1)* %in.a.gep, align 4
+ %b = load float addrspace(1)* %in.b.gep, align 4
+
+ %fma = call float @llvm.fma.f32(float %a, float 2.0, float %b)
+ store float %fma, float addrspace(1)* %out.gep, align 4
+ ret void
}
-; FUNC-LABEL: @fma_v4f64
-; SI: V_FMA_F64
-; SI: V_FMA_F64
-; SI: V_FMA_F64
-; SI: V_FMA_F64
-define void @fma_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1,
- <4 x double> addrspace(1)* %in2, <4 x double> addrspace(1)* %in3) {
- %r0 = load <4 x double> addrspace(1)* %in1
- %r1 = load <4 x double> addrspace(1)* %in2
- %r2 = load <4 x double> addrspace(1)* %in3
- %r3 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %r0, <4 x double> %r1, <4 x double> %r2)
- store <4 x double> %r3, <4 x double> addrspace(1)* %out
- ret void
+; FUNC-LABEL: @fma_commute_mul_s_f32
+define void @fma_commute_mul_s_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b, float %b) nounwind {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %in.a.gep = getelementptr float addrspace(1)* %in.a, i32 %tid
+ %in.b.gep = getelementptr float addrspace(1)* %in.b, i32 %tid
+ %out.gep = getelementptr float addrspace(1)* %out, i32 %tid
+
+ %a = load float addrspace(1)* %in.a.gep, align 4
+ %c = load float addrspace(1)* %in.b.gep, align 4
+
+ %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
+ store float %fma, float addrspace(1)* %out.gep, align 4
+ ret void
}
diff --git a/test/CodeGen/R600/fmax3.ll b/test/CodeGen/R600/fmax3.ll
new file mode 100644
index 000000000000..6f95bf20f73b
--- /dev/null
+++ b/test/CodeGen/R600/fmax3.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.maxnum.f32(float, float) nounwind readnone
+
+; SI-LABEL: {{^}}test_fmax3_olt_0:
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @test_fmax3_olt_0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
+ %a = load float addrspace(1)* %aptr, align 4
+ %b = load float addrspace(1)* %bptr, align 4
+ %c = load float addrspace(1)* %cptr, align 4
+ %f0 = call float @llvm.maxnum.f32(float %a, float %b) nounwind readnone
+ %f1 = call float @llvm.maxnum.f32(float %f0, float %c) nounwind readnone
+ store float %f1, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; Commute operand of second fmax
+; SI-LABEL: {{^}}test_fmax3_olt_1:
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @test_fmax3_olt_1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
+ %a = load float addrspace(1)* %aptr, align 4
+ %b = load float addrspace(1)* %bptr, align 4
+ %c = load float addrspace(1)* %cptr, align 4
+ %f0 = call float @llvm.maxnum.f32(float %a, float %b) nounwind readnone
+ %f1 = call float @llvm.maxnum.f32(float %c, float %f0) nounwind readnone
+ store float %f1, float addrspace(1)* %out, align 4
+ ret void
+}
diff --git a/test/CodeGen/R600/fmax_legacy.f64.ll b/test/CodeGen/R600/fmax_legacy.f64.ll
new file mode 100644
index 000000000000..a615825a45d3
--- /dev/null
+++ b/test/CodeGen/R600/fmax_legacy.f64.ll
@@ -0,0 +1,67 @@
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; Make sure we don't try to form FMAX_LEGACY nodes with f64
+
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; FUNC-LABEL: @test_fmax_legacy_uge_f64
+define void @test_fmax_legacy_uge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+
+ %a = load double addrspace(1)* %gep.0, align 8
+ %b = load double addrspace(1)* %gep.1, align 8
+
+ %cmp = fcmp uge double %a, %b
+ %val = select i1 %cmp, double %a, double %b
+ store double %val, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: @test_fmax_legacy_oge_f64
+define void @test_fmax_legacy_oge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+
+ %a = load double addrspace(1)* %gep.0, align 8
+ %b = load double addrspace(1)* %gep.1, align 8
+
+ %cmp = fcmp oge double %a, %b
+ %val = select i1 %cmp, double %a, double %b
+ store double %val, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: @test_fmax_legacy_ugt_f64
+define void @test_fmax_legacy_ugt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+
+ %a = load double addrspace(1)* %gep.0, align 8
+ %b = load double addrspace(1)* %gep.1, align 8
+
+ %cmp = fcmp ugt double %a, %b
+ %val = select i1 %cmp, double %a, double %b
+ store double %val, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: @test_fmax_legacy_ogt_f64
+define void @test_fmax_legacy_ogt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+
+ %a = load double addrspace(1)* %gep.0, align 8
+ %b = load double addrspace(1)* %gep.1, align 8
+
+ %cmp = fcmp ogt double %a, %b
+ %val = select i1 %cmp, double %a, double %b
+ store double %val, double addrspace(1)* %out, align 8
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/fmax_legacy.ll b/test/CodeGen/R600/fmax_legacy.ll
new file mode 100644
index 000000000000..46f0e9831e6a
--- /dev/null
+++ b/test/CodeGen/R600/fmax_legacy.ll
@@ -0,0 +1,116 @@
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=SI-SAFE -check-prefix=FUNC %s
+; RUN: llc -enable-no-nans-fp-math -enable-unsafe-fp-math -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FIXME: Should replace unsafe-fp-math with no signed zeros.
+
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; FUNC-LABEL: @test_fmax_legacy_uge_f32
+; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+
+; EG: MAX
+define void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+
+ %a = load float addrspace(1)* %gep.0, align 4
+ %b = load float addrspace(1)* %gep.1, align 4
+
+ %cmp = fcmp uge float %a, %b
+ %val = select i1 %cmp, float %a, float %b
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @test_fmax_legacy_oge_f32
+; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
+; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+; EG: MAX
+define void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+
+ %a = load float addrspace(1)* %gep.0, align 4
+ %b = load float addrspace(1)* %gep.1, align 4
+
+ %cmp = fcmp oge float %a, %b
+ %val = select i1 %cmp, float %a, float %b
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @test_fmax_legacy_ugt_f32
+; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+; EG: MAX
+define void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+
+ %a = load float addrspace(1)* %gep.0, align 4
+ %b = load float addrspace(1)* %gep.1, align 4
+
+ %cmp = fcmp ugt float %a, %b
+ %val = select i1 %cmp, float %a, float %b
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @test_fmax_legacy_ogt_f32
+; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
+; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+; EG: MAX
+define void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+
+ %a = load float addrspace(1)* %gep.0, align 4
+ %b = load float addrspace(1)* %gep.1, align 4
+
+ %cmp = fcmp ogt float %a, %b
+ %val = select i1 %cmp, float %a, float %b
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+
+; FUNC-LABEL: @test_fmax_legacy_ogt_f32_multi_use
+; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-NOT: v_max_
+; SI: v_cmp_gt_f32
+; SI-NEXT: v_cndmask_b32
+; SI-NOT: v_max_
+
+; EG: MAX
+define void @test_fmax_legacy_ogt_f32_multi_use(float addrspace(1)* %out0, i1 addrspace(1)* %out1, float addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+
+ %a = load float addrspace(1)* %gep.0, align 4
+ %b = load float addrspace(1)* %gep.1, align 4
+
+ %cmp = fcmp ogt float %a, %b
+ %val = select i1 %cmp, float %a, float %b
+ store float %val, float addrspace(1)* %out0, align 4
+ store i1 %cmp, i1addrspace(1)* %out1
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/fmaxnum.f64.ll b/test/CodeGen/R600/fmaxnum.f64.ll
new file mode 100644
index 000000000000..e92996aa2b1f
--- /dev/null
+++ b/test/CodeGen/R600/fmaxnum.f64.ll
@@ -0,0 +1,75 @@
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare double @llvm.maxnum.f64(double, double) #0
+declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>) #0
+declare <4 x double> @llvm.maxnum.v4f64(<4 x double>, <4 x double>) #0
+declare <8 x double> @llvm.maxnum.v8f64(<8 x double>, <8 x double>) #0
+declare <16 x double> @llvm.maxnum.v16f64(<16 x double>, <16 x double>) #0
+
+; FUNC-LABEL: @test_fmax_f64
+; SI: v_max_f64
+define void @test_fmax_f64(double addrspace(1)* %out, double %a, double %b) nounwind {
+ %val = call double @llvm.maxnum.f64(double %a, double %b) #0
+ store double %val, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: @test_fmax_v2f64
+; SI: v_max_f64
+; SI: v_max_f64
+define void @test_fmax_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind {
+ %val = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %a, <2 x double> %b) #0
+ store <2 x double> %val, <2 x double> addrspace(1)* %out, align 16
+ ret void
+}
+
+; FUNC-LABEL: @test_fmax_v4f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+define void @test_fmax_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind {
+ %val = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %a, <4 x double> %b) #0
+ store <4 x double> %val, <4 x double> addrspace(1)* %out, align 32
+ ret void
+}
+
+; FUNC-LABEL: @test_fmax_v8f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+define void @test_fmax_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind {
+ %val = call <8 x double> @llvm.maxnum.v8f64(<8 x double> %a, <8 x double> %b) #0
+ store <8 x double> %val, <8 x double> addrspace(1)* %out, align 64
+ ret void
+}
+
+; FUNC-LABEL: @test_fmax_v16f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+define void @test_fmax_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind {
+ %val = call <16 x double> @llvm.maxnum.v16f64(<16 x double> %a, <16 x double> %b) #0
+ store <16 x double> %val, <16 x double> addrspace(1)* %out, align 128
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
diff --git a/test/CodeGen/R600/fmaxnum.ll b/test/CodeGen/R600/fmaxnum.ll
new file mode 100644
index 000000000000..473184af214b
--- /dev/null
+++ b/test/CodeGen/R600/fmaxnum.ll
@@ -0,0 +1,191 @@
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare float @llvm.maxnum.f32(float, float) #0
+declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #0
+declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #0
+declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) #0
+declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>) #0
+
+declare double @llvm.maxnum.f64(double, double)
+
+; FUNC-LABEL: @test_fmax_f32
+; SI: v_max_f32_e32
+define void @test_fmax_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
+ %val = call float @llvm.maxnum.f32(float %a, float %b) #0
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @test_fmax_v2f32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+define void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind {
+ %val = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %a, <2 x float> %b) #0
+ store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: @test_fmax_v4f32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+define void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind {
+ %val = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b) #0
+ store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16
+ ret void
+}
+
+; FUNC-LABEL: @test_fmax_v8f32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+define void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind {
+ %val = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %a, <8 x float> %b) #0
+ store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32
+ ret void
+}
+
+; FUNC-LABEL: @test_fmax_v16f32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+define void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind {
+ %val = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %a, <16 x float> %b) #0
+ store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64
+ ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmax_f32
+; SI-NOT: v_max_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 2.0
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmax_f32(float addrspace(1)* %out) nounwind {
+ %val = call float @llvm.maxnum.f32(float 1.0, float 2.0) #0
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmax_f32_nan_nan
+; SI-NOT: v_max_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %out) nounwind {
+ %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) #0
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmax_f32_val_nan
+; SI-NOT: v_max_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %out) nounwind {
+ %val = call float @llvm.maxnum.f32(float 1.0, float 0x7FF8000000000000) #0
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmax_f32_nan_val
+; SI-NOT: v_max_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %out) nounwind {
+ %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 1.0) #0
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmax_f32_p0_p0
+; SI-NOT: v_max_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out) nounwind {
+ %val = call float @llvm.maxnum.f32(float 0.0, float 0.0) #0
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmax_f32_p0_n0
+; SI-NOT: v_max_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out) nounwind {
+ %val = call float @llvm.maxnum.f32(float 0.0, float -0.0) #0
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmax_f32_n0_p0
+; SI-NOT: v_max_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out) nounwind {
+ %val = call float @llvm.maxnum.f32(float -0.0, float 0.0) #0
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmax_f32_n0_n0
+; SI-NOT: v_max_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out) nounwind {
+ %val = call float @llvm.maxnum.f32(float -0.0, float -0.0) #0
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @fmax_var_immediate_f32
+; SI: v_max_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
+define void @fmax_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind {
+ %val = call float @llvm.maxnum.f32(float %a, float 2.0) #0
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @fmax_immediate_var_f32
+; SI: v_max_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
+define void @fmax_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind {
+ %val = call float @llvm.maxnum.f32(float 2.0, float %a) #0
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @fmax_var_literal_f32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
+; SI: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
+define void @fmax_var_literal_f32(float addrspace(1)* %out, float %a) nounwind {
+ %val = call float @llvm.maxnum.f32(float %a, float 99.0) #0
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @fmax_literal_var_f32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
+; SI: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
+define void @fmax_literal_var_f32(float addrspace(1)* %out, float %a) nounwind {
+ %val = call float @llvm.maxnum.f32(float 99.0, float %a) #0
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
diff --git a/test/CodeGen/R600/fmin3.ll b/test/CodeGen/R600/fmin3.ll
new file mode 100644
index 000000000000..aeeed1c7dd39
--- /dev/null
+++ b/test/CodeGen/R600/fmin3.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.minnum.f32(float, float) nounwind readnone
+
+; SI-LABEL: {{^}}test_fmin3_olt_0:
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @test_fmin3_olt_0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
+ %a = load float addrspace(1)* %aptr, align 4
+ %b = load float addrspace(1)* %bptr, align 4
+ %c = load float addrspace(1)* %cptr, align 4
+ %f0 = call float @llvm.minnum.f32(float %a, float %b) nounwind readnone
+ %f1 = call float @llvm.minnum.f32(float %f0, float %c) nounwind readnone
+ store float %f1, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; Commute operand of second fmin
+; SI-LABEL: {{^}}test_fmin3_olt_1:
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @test_fmin3_olt_1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
+ %a = load float addrspace(1)* %aptr, align 4
+ %b = load float addrspace(1)* %bptr, align 4
+ %c = load float addrspace(1)* %cptr, align 4
+ %f0 = call float @llvm.minnum.f32(float %a, float %b) nounwind readnone
+ %f1 = call float @llvm.minnum.f32(float %c, float %f0) nounwind readnone
+ store float %f1, float addrspace(1)* %out, align 4
+ ret void
+}
diff --git a/test/CodeGen/R600/fmin_legacy.f64.ll b/test/CodeGen/R600/fmin_legacy.f64.ll
new file mode 100644
index 000000000000..51dcd06f9397
--- /dev/null
+++ b/test/CodeGen/R600/fmin_legacy.f64.ll
@@ -0,0 +1,77 @@
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; FUNC-LABEL: @test_fmin_legacy_f64
+define void @test_fmin_legacy_f64(<4 x double> addrspace(1)* %out, <4 x double> inreg %reg0) #0 {
+ %r0 = extractelement <4 x double> %reg0, i32 0
+ %r1 = extractelement <4 x double> %reg0, i32 1
+ %r2 = fcmp uge double %r0, %r1
+ %r3 = select i1 %r2, double %r1, double %r0
+ %vec = insertelement <4 x double> undef, double %r3, i32 0
+ store <4 x double> %vec, <4 x double> addrspace(1)* %out, align 16
+ ret void
+}
+
+; FUNC-LABEL: @test_fmin_legacy_ule_f64
+define void @test_fmin_legacy_ule_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+
+ %a = load double addrspace(1)* %gep.0, align 8
+ %b = load double addrspace(1)* %gep.1, align 8
+
+ %cmp = fcmp ule double %a, %b
+ %val = select i1 %cmp, double %a, double %b
+ store double %val, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: @test_fmin_legacy_ole_f64
+define void @test_fmin_legacy_ole_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+
+ %a = load double addrspace(1)* %gep.0, align 8
+ %b = load double addrspace(1)* %gep.1, align 8
+
+ %cmp = fcmp ole double %a, %b
+ %val = select i1 %cmp, double %a, double %b
+ store double %val, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: @test_fmin_legacy_olt_f64
+define void @test_fmin_legacy_olt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+
+ %a = load double addrspace(1)* %gep.0, align 8
+ %b = load double addrspace(1)* %gep.1, align 8
+
+ %cmp = fcmp olt double %a, %b
+ %val = select i1 %cmp, double %a, double %b
+ store double %val, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: @test_fmin_legacy_ult_f64
+define void @test_fmin_legacy_ult_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+
+ %a = load double addrspace(1)* %gep.0, align 8
+ %b = load double addrspace(1)* %gep.1, align 8
+
+ %cmp = fcmp ult double %a, %b
+ %val = select i1 %cmp, double %a, double %b
+ store double %val, double addrspace(1)* %out, align 8
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/fmin_legacy.ll b/test/CodeGen/R600/fmin_legacy.ll
new file mode 100644
index 000000000000..5014f6c55329
--- /dev/null
+++ b/test/CodeGen/R600/fmin_legacy.ll
@@ -0,0 +1,123 @@
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -enable-no-nans-fp-math -enable-unsafe-fp-math -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FIXME: Should replace unsafe-fp-math with no signed zeros.
+
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; FUNC-LABEL: @test_fmin_legacy_f32
+; EG: MIN *
+; SI-SAFE: v_min_legacy_f32_e32
+; SI-NONAN: v_min_f32_e32
+define void @test_fmin_legacy_f32(<4 x float> addrspace(1)* %out, <4 x float> inreg %reg0) #0 {
+ %r0 = extractelement <4 x float> %reg0, i32 0
+ %r1 = extractelement <4 x float> %reg0, i32 1
+ %r2 = fcmp uge float %r0, %r1
+ %r3 = select i1 %r2, float %r1, float %r0
+ %vec = insertelement <4 x float> undef, float %r3, i32 0
+ store <4 x float> %vec, <4 x float> addrspace(1)* %out, align 16
+ ret void
+}
+
+; FUNC-LABEL: @test_fmin_legacy_ule_f32
+; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+define void @test_fmin_legacy_ule_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+
+ %a = load float addrspace(1)* %gep.0, align 4
+ %b = load float addrspace(1)* %gep.1, align 4
+
+ %cmp = fcmp ule float %a, %b
+ %val = select i1 %cmp, float %a, float %b
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @test_fmin_legacy_ole_f32
+; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
+; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+define void @test_fmin_legacy_ole_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+
+ %a = load float addrspace(1)* %gep.0, align 4
+ %b = load float addrspace(1)* %gep.1, align 4
+
+ %cmp = fcmp ole float %a, %b
+ %val = select i1 %cmp, float %a, float %b
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @test_fmin_legacy_olt_f32
+; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
+; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+define void @test_fmin_legacy_olt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+
+ %a = load float addrspace(1)* %gep.0, align 4
+ %b = load float addrspace(1)* %gep.1, align 4
+
+ %cmp = fcmp olt float %a, %b
+ %val = select i1 %cmp, float %a, float %b
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @test_fmin_legacy_ult_f32
+; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+define void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+
+ %a = load float addrspace(1)* %gep.0, align 4
+ %b = load float addrspace(1)* %gep.1, align 4
+
+ %cmp = fcmp ult float %a, %b
+ %val = select i1 %cmp, float %a, float %b
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @test_fmin_legacy_ole_f32_multi_use
+; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-NOT: v_min
+; SI: v_cmp_le_f32
+; SI-NEXT: v_cndmask_b32
+; SI-NOT: v_min
+; SI: s_endpgm
+define void @test_fmin_legacy_ole_f32_multi_use(float addrspace(1)* %out0, i1 addrspace(1)* %out1, float addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+
+ %a = load float addrspace(1)* %gep.0, align 4
+ %b = load float addrspace(1)* %gep.1, align 4
+
+ %cmp = fcmp ole float %a, %b
+ %val0 = select i1 %cmp, float %a, float %b
+ store float %val0, float addrspace(1)* %out0, align 4
+ store i1 %cmp, i1 addrspace(1)* %out1
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/fminnum.f64.ll b/test/CodeGen/R600/fminnum.f64.ll
new file mode 100644
index 000000000000..b8476f98bab8
--- /dev/null
+++ b/test/CodeGen/R600/fminnum.f64.ll
@@ -0,0 +1,75 @@
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare double @llvm.minnum.f64(double, double) #0
+declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) #0
+declare <4 x double> @llvm.minnum.v4f64(<4 x double>, <4 x double>) #0
+declare <8 x double> @llvm.minnum.v8f64(<8 x double>, <8 x double>) #0
+declare <16 x double> @llvm.minnum.v16f64(<16 x double>, <16 x double>) #0
+
+; FUNC-LABEL: @test_fmin_f64
+; SI: v_min_f64
+define void @test_fmin_f64(double addrspace(1)* %out, double %a, double %b) nounwind {
+ %val = call double @llvm.minnum.f64(double %a, double %b) #0
+ store double %val, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: @test_fmin_v2f64
+; SI: v_min_f64
+; SI: v_min_f64
+define void @test_fmin_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind {
+ %val = call <2 x double> @llvm.minnum.v2f64(<2 x double> %a, <2 x double> %b) #0
+ store <2 x double> %val, <2 x double> addrspace(1)* %out, align 16
+ ret void
+}
+
+; FUNC-LABEL: @test_fmin_v4f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+define void @test_fmin_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind {
+ %val = call <4 x double> @llvm.minnum.v4f64(<4 x double> %a, <4 x double> %b) #0
+ store <4 x double> %val, <4 x double> addrspace(1)* %out, align 32
+ ret void
+}
+
+; FUNC-LABEL: @test_fmin_v8f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+define void @test_fmin_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind {
+ %val = call <8 x double> @llvm.minnum.v8f64(<8 x double> %a, <8 x double> %b) #0
+ store <8 x double> %val, <8 x double> addrspace(1)* %out, align 64
+ ret void
+}
+
+; FUNC-LABEL: @test_fmin_v16f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+define void @test_fmin_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind {
+ %val = call <16 x double> @llvm.minnum.v16f64(<16 x double> %a, <16 x double> %b) #0
+ store <16 x double> %val, <16 x double> addrspace(1)* %out, align 128
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
diff --git a/test/CodeGen/R600/fminnum.ll b/test/CodeGen/R600/fminnum.ll
new file mode 100644
index 000000000000..cd1a948707e8
--- /dev/null
+++ b/test/CodeGen/R600/fminnum.ll
@@ -0,0 +1,189 @@
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare float @llvm.minnum.f32(float, float) #0
+declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #0
+declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) #0
+declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>) #0
+declare <16 x float> @llvm.minnum.v16f32(<16 x float>, <16 x float>) #0
+
+; FUNC-LABEL: @test_fmin_f32
+; SI: v_min_f32_e32
+define void @test_fmin_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
+ %val = call float @llvm.minnum.f32(float %a, float %b) #0
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @test_fmin_v2f32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+define void @test_fmin_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind {
+ %val = call <2 x float> @llvm.minnum.v2f32(<2 x float> %a, <2 x float> %b) #0
+ store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: @test_fmin_v4f32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+define void @test_fmin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind {
+ %val = call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b) #0
+ store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16
+ ret void
+}
+
+; FUNC-LABEL: @test_fmin_v8f32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+define void @test_fmin_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind {
+ %val = call <8 x float> @llvm.minnum.v8f32(<8 x float> %a, <8 x float> %b) #0
+ store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32
+ ret void
+}
+
+; FUNC-LABEL: @test_fmin_v16f32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+define void @test_fmin_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind {
+ %val = call <16 x float> @llvm.minnum.v16f32(<16 x float> %a, <16 x float> %b) #0
+ store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64
+ ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmin_f32
+; SI-NOT: v_min_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmin_f32(float addrspace(1)* %out) nounwind {
+ %val = call float @llvm.minnum.f32(float 1.0, float 2.0) #0
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmin_f32_nan_nan
+; SI-NOT: v_min_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmin_f32_nan_nan(float addrspace(1)* %out) nounwind {
+ %val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) #0
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmin_f32_val_nan
+; SI-NOT: v_min_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmin_f32_val_nan(float addrspace(1)* %out) nounwind {
+ %val = call float @llvm.minnum.f32(float 1.0, float 0x7FF8000000000000) #0
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmin_f32_nan_val
+; SI-NOT: v_min_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmin_f32_nan_val(float addrspace(1)* %out) nounwind {
+ %val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 1.0) #0
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmin_f32_p0_p0
+; SI-NOT: v_min_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmin_f32_p0_p0(float addrspace(1)* %out) nounwind {
+ %val = call float @llvm.minnum.f32(float 0.0, float 0.0) #0
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmin_f32_p0_n0
+; SI-NOT: v_min_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmin_f32_p0_n0(float addrspace(1)* %out) nounwind {
+ %val = call float @llvm.minnum.f32(float 0.0, float -0.0) #0
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmin_f32_n0_p0
+; SI-NOT: v_min_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmin_f32_n0_p0(float addrspace(1)* %out) nounwind {
+ %val = call float @llvm.minnum.f32(float -0.0, float 0.0) #0
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmin_f32_n0_n0
+; SI-NOT: v_min_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmin_f32_n0_n0(float addrspace(1)* %out) nounwind {
+ %val = call float @llvm.minnum.f32(float -0.0, float -0.0) #0
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @fmin_var_immediate_f32
+; SI: v_min_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
+define void @fmin_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind {
+ %val = call float @llvm.minnum.f32(float %a, float 2.0) #0
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @fmin_immediate_var_f32
+; SI: v_min_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
+define void @fmin_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind {
+ %val = call float @llvm.minnum.f32(float 2.0, float %a) #0
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @fmin_var_literal_f32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
+; SI: v_min_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
+define void @fmin_var_literal_f32(float addrspace(1)* %out, float %a) nounwind {
+ %val = call float @llvm.minnum.f32(float %a, float 99.0) #0
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @fmin_literal_var_f32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
+; SI: v_min_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
+define void @fmin_literal_var_f32(float addrspace(1)* %out, float %a) nounwind {
+ %val = call float @llvm.minnum.f32(float 99.0, float %a) #0
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
diff --git a/test/CodeGen/R600/fmul.ll b/test/CodeGen/R600/fmul.ll
index 2a7825f9ecf7..7296a8760be2 100644
--- a/test/CodeGen/R600/fmul.ll
+++ b/test/CodeGen/R600/fmul.ll
@@ -1,10 +1,11 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
-; R600-CHECK: @fmul_f32
-; R600-CHECK: MUL_IEEE {{\** *}}{{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
-; SI-CHECK: @fmul_f32
-; SI-CHECK: V_MUL_F32
+
+; FUNC-LABEL: {{^}}fmul_f32:
+; R600: MUL_IEEE {{\** *}}{{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
+
+; SI: v_mul_f32
define void @fmul_f32(float addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fmul float %a, %b
@@ -16,12 +17,12 @@ declare float @llvm.R600.load.input(i32) readnone
declare void @llvm.AMDGPU.store.output(float, i32)
-; R600-CHECK: @fmul_v2f32
-; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}
-; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}
-; SI-CHECK: @fmul_v2f32
-; SI-CHECK: V_MUL_F32
-; SI-CHECK: V_MUL_F32
+; FUNC-LABEL: {{^}}fmul_v2f32:
+; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}
+; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}
+
+; SI: v_mul_f32
+; SI: v_mul_f32
define void @fmul_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
entry:
%0 = fmul <2 x float> %a, %b
@@ -29,16 +30,16 @@ entry:
ret void
}
-; R600-CHECK: @fmul_v4f32
-; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI-CHECK: @fmul_v4f32
-; SI-CHECK: V_MUL_F32
-; SI-CHECK: V_MUL_F32
-; SI-CHECK: V_MUL_F32
-; SI-CHECK: V_MUL_F32
+; FUNC-LABEL: {{^}}fmul_v4f32:
+; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+; SI: v_mul_f32
+; SI: v_mul_f32
+; SI: v_mul_f32
+; SI: v_mul_f32
define void @fmul_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
%a = load <4 x float> addrspace(1) * %in
@@ -47,3 +48,28 @@ define void @fmul_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)
store <4 x float> %result, <4 x float> addrspace(1)* %out
ret void
}
+
+; FUNC-LABEL: {{^}}test_mul_2_k:
+; SI: v_mul_f32
+; SI-NOT: v_mul_f32
+; SI: s_endpgm
+define void @test_mul_2_k(float addrspace(1)* %out, float %x) #0 {
+ %y = fmul float %x, 2.0
+ %z = fmul float %y, 3.0
+ store float %z, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_mul_2_k_inv:
+; SI: v_mul_f32
+; SI-NOT: v_mul_f32
+; SI-NOT: v_mad_f32
+; SI: s_endpgm
+define void @test_mul_2_k_inv(float addrspace(1)* %out, float %x) #0 {
+ %y = fmul float %x, 3.0
+ %z = fmul float %y, 2.0
+ store float %z, float addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" }
diff --git a/test/CodeGen/R600/fmul64.ll b/test/CodeGen/R600/fmul64.ll
index 7c7bf041496b..882307ef458b 100644
--- a/test/CodeGen/R600/fmul64.ll
+++ b/test/CodeGen/R600/fmul64.ll
@@ -1,8 +1,7 @@
-; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
-
-; CHECK: @fmul_f64
-; CHECK: V_MUL_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
+; FUNC-LABEL: {{^}}fmul_f64:
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
define void @fmul_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
%r0 = load double addrspace(1)* %in1
@@ -11,3 +10,29 @@ define void @fmul_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
store double %r2, double addrspace(1)* %out
ret void
}
+
+; FUNC-LABEL: {{^}}fmul_v2f64:
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+define void @fmul_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
+ <2 x double> addrspace(1)* %in2) {
+ %r0 = load <2 x double> addrspace(1)* %in1
+ %r1 = load <2 x double> addrspace(1)* %in2
+ %r2 = fmul <2 x double> %r0, %r1
+ store <2 x double> %r2, <2 x double> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fmul_v4f64:
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+define void @fmul_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1,
+ <4 x double> addrspace(1)* %in2) {
+ %r0 = load <4 x double> addrspace(1)* %in1
+ %r1 = load <4 x double> addrspace(1)* %in2
+ %r2 = fmul <4 x double> %r0, %r1
+ store <4 x double> %r2, <4 x double> addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/fmuladd.ll b/test/CodeGen/R600/fmuladd.ll
index 48944f629482..2b708639b122 100644
--- a/test/CodeGen/R600/fmuladd.ll
+++ b/test/CodeGen/R600/fmuladd.ll
@@ -1,7 +1,12 @@
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s
-; CHECK: @fmuladd_f32
-; CHECK: V_MAD_F32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
+declare float @llvm.fmuladd.f32(float, float, float)
+declare double @llvm.fmuladd.f64(double, double, double)
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+declare float @llvm.fabs.f32(float) nounwind readnone
+
+; CHECK-LABEL: {{^}}fmuladd_f32:
+; CHECK: v_mad_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
define void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
float addrspace(1)* %in2, float addrspace(1)* %in3) {
@@ -13,10 +18,8 @@ define void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
ret void
}
-declare float @llvm.fmuladd.f32(float, float, float)
-
-; CHECK: @fmuladd_f64
-; CHECK: V_FMA_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; CHECK-LABEL: {{^}}fmuladd_f64:
+; CHECK: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
define void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2, double addrspace(1)* %in3) {
@@ -28,4 +31,169 @@ define void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
ret void
}
-declare double @llvm.fmuladd.f64(double, double, double)
+; CHECK-LABEL: {{^}}fmuladd_2.0_a_b_f32
+; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
+; CHECK: buffer_store_dword [[RESULT]]
+define void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+
+ %r1 = load float addrspace(1)* %gep.0
+ %r2 = load float addrspace(1)* %gep.1
+
+ %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1, float %r2)
+ store float %r3, float addrspace(1)* %gep.out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}fmuladd_a_2.0_b_f32
+; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
+; CHECK: buffer_store_dword [[RESULT]]
+define void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+
+ %r1 = load float addrspace(1)* %gep.0
+ %r2 = load float addrspace(1)* %gep.1
+
+ %r3 = tail call float @llvm.fmuladd.f32(float %r1, float 2.0, float %r2)
+ store float %r3, float addrspace(1)* %gep.out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}fadd_a_a_b_f32:
+; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
+; CHECK: buffer_store_dword [[RESULT]]
+define void @fadd_a_a_b_f32(float addrspace(1)* %out,
+ float addrspace(1)* %in1,
+ float addrspace(1)* %in2) {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+
+ %r0 = load float addrspace(1)* %gep.0
+ %r1 = load float addrspace(1)* %gep.1
+
+ %add.0 = fadd float %r0, %r0
+ %add.1 = fadd float %add.0, %r1
+ store float %add.1, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}fadd_b_a_a_f32:
+; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
+; CHECK: buffer_store_dword [[RESULT]]
+define void @fadd_b_a_a_f32(float addrspace(1)* %out,
+ float addrspace(1)* %in1,
+ float addrspace(1)* %in2) {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+
+ %r0 = load float addrspace(1)* %gep.0
+ %r1 = load float addrspace(1)* %gep.1
+
+ %add.0 = fadd float %r0, %r0
+ %add.1 = fadd float %r1, %add.0
+ store float %add.1, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}fmuladd_neg_2.0_a_b_f32
+; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], -2.0, [[R1]], [[R2]]
+; CHECK: buffer_store_dword [[RESULT]]
+define void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+
+ %r1 = load float addrspace(1)* %gep.0
+ %r2 = load float addrspace(1)* %gep.1
+
+ %r3 = tail call float @llvm.fmuladd.f32(float -2.0, float %r1, float %r2)
+ store float %r3, float addrspace(1)* %gep.out
+ ret void
+}
+
+
+; CHECK-LABEL: {{^}}fmuladd_neg_2.0_neg_a_b_f32
+; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
+; CHECK: buffer_store_dword [[RESULT]]
+define void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+
+ %r1 = load float addrspace(1)* %gep.0
+ %r2 = load float addrspace(1)* %gep.1
+
+ %r1.fneg = fsub float -0.000000e+00, %r1
+
+ %r3 = tail call float @llvm.fmuladd.f32(float -2.0, float %r1.fneg, float %r2)
+ store float %r3, float addrspace(1)* %gep.out
+ ret void
+}
+
+
+; CHECK-LABEL: {{^}}fmuladd_2.0_neg_a_b_f32
+; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], -2.0, [[R1]], [[R2]]
+; CHECK: buffer_store_dword [[RESULT]]
+define void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+
+ %r1 = load float addrspace(1)* %gep.0
+ %r2 = load float addrspace(1)* %gep.1
+
+ %r1.fneg = fsub float -0.000000e+00, %r1
+
+ %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1.fneg, float %r2)
+ store float %r3, float addrspace(1)* %gep.out
+ ret void
+}
+
+
+; CHECK-LABEL: {{^}}fmuladd_2.0_a_neg_b_f32
+; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], -[[R2]]
+; CHECK: buffer_store_dword [[RESULT]]
+define void @fmuladd_2.0_a_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+
+ %r1 = load float addrspace(1)* %gep.0
+ %r2 = load float addrspace(1)* %gep.1
+
+ %r2.fneg = fsub float -0.000000e+00, %r2
+
+ %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1, float %r2.fneg)
+ store float %r3, float addrspace(1)* %gep.out
+ ret void
+}
diff --git a/test/CodeGen/R600/fnearbyint.ll b/test/CodeGen/R600/fnearbyint.ll
index 1c1d7315189f..30bc67689e1c 100644
--- a/test/CodeGen/R600/fnearbyint.ll
+++ b/test/CodeGen/R600/fnearbyint.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s
-; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s
; This should have the exactly the same output as the test for rint,
; so no need to check anything.
diff --git a/test/CodeGen/R600/fneg-fabs.f64.ll b/test/CodeGen/R600/fneg-fabs.f64.ll
new file mode 100644
index 000000000000..04a87e377857
--- /dev/null
+++ b/test/CodeGen/R600/fneg-fabs.f64.ll
@@ -0,0 +1,101 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+; FIXME: Check something here. Currently it seems fabs + fneg aren't
+; into 2 modifiers, although theoretically that should work.
+
+; FUNC-LABEL: {{^}}fneg_fabs_fadd_f64:
+; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x7fffffff
+; SI: v_and_b32_e32 v[[FABS:[0-9]+]], {{s[0-9]+}}, [[IMMREG]]
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+}}:[[FABS]]{{\]}}
+define void @fneg_fabs_fadd_f64(double addrspace(1)* %out, double %x, double %y) {
+ %fabs = call double @llvm.fabs.f64(double %x)
+ %fsub = fsub double -0.000000e+00, %fabs
+ %fadd = fadd double %y, %fsub
+ store double %fadd, double addrspace(1)* %out, align 8
+ ret void
+}
+
+define void @v_fneg_fabs_fadd_f64(double addrspace(1)* %out, double addrspace(1)* %xptr, double addrspace(1)* %yptr) {
+ %x = load double addrspace(1)* %xptr, align 8
+ %y = load double addrspace(1)* %xptr, align 8
+ %fabs = call double @llvm.fabs.f64(double %x)
+ %fsub = fsub double -0.000000e+00, %fabs
+ %fadd = fadd double %y, %fsub
+ store double %fadd, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fneg_fabs_fmul_f64:
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -|{{v\[[0-9]+:[0-9]+\]}}|
+define void @fneg_fabs_fmul_f64(double addrspace(1)* %out, double %x, double %y) {
+ %fabs = call double @llvm.fabs.f64(double %x)
+ %fsub = fsub double -0.000000e+00, %fabs
+ %fmul = fmul double %y, %fsub
+ store double %fmul, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fneg_fabs_free_f64:
+define void @fneg_fabs_free_f64(double addrspace(1)* %out, i64 %in) {
+ %bc = bitcast i64 %in to double
+ %fabs = call double @llvm.fabs.f64(double %bc)
+ %fsub = fsub double -0.000000e+00, %fabs
+ store double %fsub, double addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fneg_fabs_fn_free_f64:
+; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
+; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+define void @fneg_fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) {
+ %bc = bitcast i64 %in to double
+ %fabs = call double @fabs(double %bc)
+ %fsub = fsub double -0.000000e+00, %fabs
+ store double %fsub, double addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fneg_fabs_f64:
+; SI: s_load_dwordx2
+; SI: s_load_dwordx2 s{{\[}}[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]{{\]}}
+; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
+; SI-DAG: v_or_b32_e32 v[[HI_V:[0-9]+]], s[[HI_X]], [[IMMREG]]
+; SI-DAG: v_mov_b32_e32 v[[LO_V:[0-9]+]], s[[LO_X]]
+; SI: buffer_store_dwordx2 v{{\[}}[[LO_V]]:[[HI_V]]{{\]}}
+define void @fneg_fabs_f64(double addrspace(1)* %out, double %in) {
+ %fabs = call double @llvm.fabs.f64(double %in)
+ %fsub = fsub double -0.000000e+00, %fabs
+ store double %fsub, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fneg_fabs_v2f64:
+; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
+; SI-NOT: 0x80000000
+; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+define void @fneg_fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
+ %fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %in)
+ %fsub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %fabs
+ store <2 x double> %fsub, <2 x double> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fneg_fabs_v4f64:
+; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
+; SI-NOT: 0x80000000
+; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+define void @fneg_fabs_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
+ %fabs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %in)
+ %fsub = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %fabs
+ store <4 x double> %fsub, <4 x double> addrspace(1)* %out
+ ret void
+}
+
+declare double @fabs(double) readnone
+declare double @llvm.fabs.f64(double) readnone
+declare <2 x double> @llvm.fabs.v2f64(<2 x double>) readnone
+declare <4 x double> @llvm.fabs.v4f64(<4 x double>) readnone
diff --git a/test/CodeGen/R600/fneg-fabs.ll b/test/CodeGen/R600/fneg-fabs.ll
index d95e1311bc10..94e8256cd261 100644
--- a/test/CodeGen/R600/fneg-fabs.ll
+++ b/test/CodeGen/R600/fneg-fabs.ll
@@ -1,55 +1,117 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}fneg_fabs_fadd_f32:
+; SI-NOT: and
+; SI: v_sub_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, |{{v[0-9]+}}|
+define void @fneg_fabs_fadd_f32(float addrspace(1)* %out, float %x, float %y) {
+ %fabs = call float @llvm.fabs.f32(float %x)
+ %fsub = fsub float -0.000000e+00, %fabs
+ %fadd = fadd float %y, %fsub
+ store float %fadd, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fneg_fabs_fmul_f32:
+; SI-NOT: and
+; SI: v_mul_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, -|{{v[0-9]+}}|
+; SI-NOT: and
+define void @fneg_fabs_fmul_f32(float addrspace(1)* %out, float %x, float %y) {
+ %fabs = call float @llvm.fabs.f32(float %x)
+ %fsub = fsub float -0.000000e+00, %fabs
+ %fmul = fmul float %y, %fsub
+ store float %fmul, float addrspace(1)* %out, align 4
+ ret void
+}
; DAGCombiner will transform:
; (fabs (f32 bitcast (i32 a))) => (f32 bitcast (and (i32 a), 0x7FFFFFFF))
; unless isFabsFree returns true
-; R600-CHECK-LABEL: @fneg_fabs_free
-; R600-CHECK-NOT: AND
-; R600-CHECK: |PV.{{[XYZW]}}|
-; R600-CHECK: -PV
-; SI-CHECK-LABEL: @fneg_fabs_free
-; SI-CHECK: V_OR_B32
-
-define void @fneg_fabs_free(float addrspace(1)* %out, i32 %in) {
-entry:
- %0 = bitcast i32 %in to float
- %1 = call float @fabs(float %0)
- %2 = fsub float -0.000000e+00, %1
- store float %2, float addrspace(1)* %out
- ret void
-}
-
-; R600-CHECK-LABEL: @fneg_fabs_v2
-; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
-; R600-CHECK: -PV
-; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
-; R600-CHECK: -PV
-; SI-CHECK-LABEL: @fneg_fabs_v2
-; SI-CHECK: V_OR_B32
-; SI-CHECK: V_OR_B32
-define void @fneg_fabs_v2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
-entry:
- %0 = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
- %1 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %0
- store <2 x float> %1, <2 x float> addrspace(1)* %out
- ret void
-}
-
-; SI-CHECK-LABEL: @fneg_fabs_v4
-; SI-CHECK: V_OR_B32
-; SI-CHECK: V_OR_B32
-; SI-CHECK: V_OR_B32
-; SI-CHECK: V_OR_B32
-define void @fneg_fabs_v4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
-entry:
- %0 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
- %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
- store <4 x float> %1, <4 x float> addrspace(1)* %out
- ret void
-}
-
-declare float @fabs(float ) readnone
-declare <2 x float> @llvm.fabs.v2f32(<2 x float> ) readnone
-declare <4 x float> @llvm.fabs.v4f32(<4 x float> ) readnone
+; FUNC-LABEL: {{^}}fneg_fabs_free_f32:
+; R600-NOT: AND
+; R600: |PV.{{[XYZW]}}|
+; R600: -PV
+
+; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
+; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+define void @fneg_fabs_free_f32(float addrspace(1)* %out, i32 %in) {
+ %bc = bitcast i32 %in to float
+ %fabs = call float @llvm.fabs.f32(float %bc)
+ %fsub = fsub float -0.000000e+00, %fabs
+ store float %fsub, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fneg_fabs_fn_free_f32:
+; R600-NOT: AND
+; R600: |PV.{{[XYZW]}}|
+; R600: -PV
+
+; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
+; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+define void @fneg_fabs_fn_free_f32(float addrspace(1)* %out, i32 %in) {
+ %bc = bitcast i32 %in to float
+ %fabs = call float @fabs(float %bc)
+ %fsub = fsub float -0.000000e+00, %fabs
+ store float %fsub, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fneg_fabs_f32:
+; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
+; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+define void @fneg_fabs_f32(float addrspace(1)* %out, float %in) {
+ %fabs = call float @llvm.fabs.f32(float %in)
+ %fsub = fsub float -0.000000e+00, %fabs
+ store float %fsub, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_fneg_fabs_f32:
+; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
+define void @v_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
+ %val = load float addrspace(1)* %in, align 4
+ %fabs = call float @llvm.fabs.f32(float %val)
+ %fsub = fsub float -0.000000e+00, %fabs
+ store float %fsub, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fneg_fabs_v2f32:
+; R600: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600: -PV
+; R600: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600: -PV
+
+; FIXME: SGPR should be used directly for first src operand.
+; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
+; SI-NOT: 0x80000000
+; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
+define void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+ %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
+ %fsub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %fabs
+ store <2 x float> %fsub, <2 x float> addrspace(1)* %out
+ ret void
+}
+
+; FIXME: SGPR should be used directly for first src operand.
+; FUNC-LABEL: {{^}}fneg_fabs_v4f32:
+; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
+; SI-NOT: 0x80000000
+; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
+define void @fneg_fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+ %fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
+ %fsub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %fabs
+ store <4 x float> %fsub, <4 x float> addrspace(1)* %out
+ ret void
+}
+
+declare float @fabs(float) readnone
+declare float @llvm.fabs.f32(float) readnone
+declare <2 x float> @llvm.fabs.v2f32(<2 x float>) readnone
+declare <4 x float> @llvm.fabs.v4f32(<4 x float>) readnone
diff --git a/test/CodeGen/R600/fneg.f64.ll b/test/CodeGen/R600/fneg.f64.ll
new file mode 100644
index 000000000000..eb2eb08b88b1
--- /dev/null
+++ b/test/CodeGen/R600/fneg.f64.ll
@@ -0,0 +1,59 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}fneg_f64:
+; SI: v_xor_b32
+define void @fneg_f64(double addrspace(1)* %out, double %in) {
+ %fneg = fsub double -0.000000e+00, %in
+ store double %fneg, double addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fneg_v2f64:
+; SI: v_xor_b32
+; SI: v_xor_b32
+define void @fneg_v2f64(<2 x double> addrspace(1)* nocapture %out, <2 x double> %in) {
+ %fneg = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %in
+ store <2 x double> %fneg, <2 x double> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fneg_v4f64:
+; R600: -PV
+; R600: -T
+; R600: -PV
+; R600: -PV
+
+; SI: v_xor_b32
+; SI: v_xor_b32
+; SI: v_xor_b32
+; SI: v_xor_b32
+define void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double> %in) {
+ %fneg = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %in
+ store <4 x double> %fneg, <4 x double> addrspace(1)* %out
+ ret void
+}
+
+; DAGCombiner will transform:
+; (fneg (f64 bitcast (i64 a))) => (f64 bitcast (xor (i64 a), 0x80000000))
+; unless the target returns true for isNegFree()
+
+; FUNC-LABEL: {{^}}fneg_free_f64:
+; FIXME: Unnecessary copy to VGPRs
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]$}}
+define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) {
+ %bc = bitcast i64 %in to double
+ %fsub = fsub double 0.0, %bc
+ store double %fsub, double addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}fneg_fold_f64:
+; SI: s_load_dwordx2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-NOT: xor
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], [[NEG_VALUE]]
+define void @fneg_fold_f64(double addrspace(1)* %out, double %in) {
+ %fsub = fsub double -0.0, %in
+ %fmul = fmul double %fsub, %in
+ store double %fmul, double addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/fneg.ll b/test/CodeGen/R600/fneg.ll
index 4cddc7378956..ca3350dd7f48 100644
--- a/test/CodeGen/R600/fneg.ll
+++ b/test/CodeGen/R600/fneg.ll
@@ -1,44 +1,41 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
-; R600-CHECK-LABEL: @fneg
-; R600-CHECK: -PV
-; SI-CHECK-LABEL: @fneg
-; SI-CHECK: V_XOR_B32
-define void @fneg(float addrspace(1)* %out, float %in) {
-entry:
- %0 = fsub float -0.000000e+00, %in
- store float %0, float addrspace(1)* %out
+; FUNC-LABEL: {{^}}fneg_f32:
+; R600: -PV
+
+; SI: v_xor_b32
+define void @fneg_f32(float addrspace(1)* %out, float %in) {
+ %fneg = fsub float -0.000000e+00, %in
+ store float %fneg, float addrspace(1)* %out
ret void
}
-; R600-CHECK-LABEL: @fneg_v2
-; R600-CHECK: -PV
-; R600-CHECK: -PV
-; SI-CHECK-LABEL: @fneg_v2
-; SI-CHECK: V_XOR_B32
-; SI-CHECK: V_XOR_B32
-define void @fneg_v2(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) {
-entry:
- %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %in
- store <2 x float> %0, <2 x float> addrspace(1)* %out
+; FUNC-LABEL: {{^}}fneg_v2f32:
+; R600: -PV
+; R600: -PV
+
+; SI: v_xor_b32
+; SI: v_xor_b32
+define void @fneg_v2f32(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) {
+ %fneg = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %in
+ store <2 x float> %fneg, <2 x float> addrspace(1)* %out
ret void
}
-; R600-CHECK-LABEL: @fneg_v4
-; R600-CHECK: -PV
-; R600-CHECK: -T
-; R600-CHECK: -PV
-; R600-CHECK: -PV
-; SI-CHECK-LABEL: @fneg_v4
-; SI-CHECK: V_XOR_B32
-; SI-CHECK: V_XOR_B32
-; SI-CHECK: V_XOR_B32
-; SI-CHECK: V_XOR_B32
-define void @fneg_v4(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) {
-entry:
- %0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %in
- store <4 x float> %0, <4 x float> addrspace(1)* %out
+; FUNC-LABEL: {{^}}fneg_v4f32:
+; R600: -PV
+; R600: -T
+; R600: -PV
+; R600: -PV
+
+; SI: v_xor_b32
+; SI: v_xor_b32
+; SI: v_xor_b32
+; SI: v_xor_b32
+define void @fneg_v4f32(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) {
+ %fneg = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %in
+ store <4 x float> %fneg, <4 x float> addrspace(1)* %out
ret void
}
@@ -46,27 +43,26 @@ entry:
; (fneg (f32 bitcast (i32 a))) => (f32 bitcast (xor (i32 a), 0x80000000))
; unless the target returns true for isNegFree()
-; R600-CHECK-LABEL: @fneg_free
-; R600-CHECK-NOT: XOR
-; R600-CHECK: -KC0[2].Z
-; SI-CHECK-LABEL: @fneg_free
-; XXX: We could use V_ADD_F32_e64 with the negate bit here instead.
-; SI-CHECK: V_SUB_F32_e64 v{{[0-9]}}, 0.000000e+00, s{{[0-9]}}, 0, 0
-define void @fneg_free(float addrspace(1)* %out, i32 %in) {
-entry:
- %0 = bitcast i32 %in to float
- %1 = fsub float 0.0, %0
- store float %1, float addrspace(1)* %out
+; FUNC-LABEL: {{^}}fneg_free_f32:
+; R600-NOT: XOR
+; R600: -KC0[2].Z
+
+; XXX: We could use v_add_f32_e64 with the negate bit here instead.
+; SI: v_sub_f32_e64 v{{[0-9]}}, 0, s{{[0-9]+$}}
+define void @fneg_free_f32(float addrspace(1)* %out, i32 %in) {
+ %bc = bitcast i32 %in to float
+ %fsub = fsub float 0.0, %bc
+ store float %fsub, float addrspace(1)* %out
ret void
}
-; SI-CHECK-LABEL: @fneg_fold
-; SI-CHECK-NOT: V_XOR_B32
-; SI-CHECK: V_MUL_F32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}
-define void @fneg_fold(float addrspace(1)* %out, float %in) {
-entry:
- %0 = fsub float -0.0, %in
- %1 = fmul float %0, %in
- store float %1, float addrspace(1)* %out
+; FUNC-LABEL: {{^}}fneg_fold_f32:
+; SI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
+; SI-NOT: xor
+; SI: v_mul_f32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]]
+define void @fneg_fold_f32(float addrspace(1)* %out, float %in) {
+ %fsub = fsub float -0.0, %in
+ %fmul = fmul float %fsub, %in
+ store float %fmul, float addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/fp-classify.ll b/test/CodeGen/R600/fp-classify.ll
new file mode 100644
index 000000000000..a1b2f08eddeb
--- /dev/null
+++ b/test/CodeGen/R600/fp-classify.ll
@@ -0,0 +1,130 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare i1 @llvm.AMDGPU.class.f32(float, i32) #1
+declare i1 @llvm.AMDGPU.class.f64(double, i32) #1
+declare i32 @llvm.r600.read.tidig.x() #1
+declare float @llvm.fabs.f32(float) #1
+declare double @llvm.fabs.f64(double) #1
+
+; SI-LABEL: {{^}}test_isinf_pattern:
+; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x204{{$}}
+; SI: v_cmp_class_f32_e32 vcc, s{{[0-9]+}}, [[MASK]]
+; SI-NOT: v_cmp
+; SI: s_endpgm
+define void @test_isinf_pattern(i32 addrspace(1)* nocapture %out, float %x) #0 {
+ %fabs = tail call float @llvm.fabs.f32(float %x) #1
+ %cmp = fcmp oeq float %fabs, 0x7FF0000000000000
+ %ext = zext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_not_isinf_pattern_0:
+; SI-NOT: v_cmp_class
+; SI: s_endpgm
+define void @test_not_isinf_pattern_0(i32 addrspace(1)* nocapture %out, float %x) #0 {
+ %fabs = tail call float @llvm.fabs.f32(float %x) #1
+ %cmp = fcmp ueq float %fabs, 0x7FF0000000000000
+ %ext = zext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_not_isinf_pattern_1:
+; SI-NOT: v_cmp_class
+; SI: s_endpgm
+define void @test_not_isinf_pattern_1(i32 addrspace(1)* nocapture %out, float %x) #0 {
+ %fabs = tail call float @llvm.fabs.f32(float %x) #1
+ %cmp = fcmp oeq float %fabs, 0xFFF0000000000000
+ %ext = zext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_isfinite_pattern_0:
+; SI-NOT: v_cmp
+; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1f8{{$}}
+; SI: v_cmp_class_f32_e32 vcc, s{{[0-9]+}}, [[MASK]]
+; SI-NOT: v_cmp
+; SI: s_endpgm
+define void @test_isfinite_pattern_0(i32 addrspace(1)* nocapture %out, float %x) #0 {
+ %ord = fcmp ord float %x, 0.000000e+00
+ %x.fabs = tail call float @llvm.fabs.f32(float %x) #1
+ %ninf = fcmp une float %x.fabs, 0x7FF0000000000000
+ %and = and i1 %ord, %ninf
+ %ext = zext i1 %and to i32
+ store i32 %ext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; Use negative infinity
+; SI-LABEL: {{^}}test_isfinite_not_pattern_0:
+; SI-NOT: v_cmp_class_f32
+; SI: s_endpgm
+define void @test_isfinite_not_pattern_0(i32 addrspace(1)* nocapture %out, float %x) #0 {
+ %ord = fcmp ord float %x, 0.000000e+00
+ %x.fabs = tail call float @llvm.fabs.f32(float %x) #1
+ %ninf = fcmp une float %x.fabs, 0xFFF0000000000000
+ %and = and i1 %ord, %ninf
+ %ext = zext i1 %and to i32
+ store i32 %ext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; No fabs
+; SI-LABEL: {{^}}test_isfinite_not_pattern_1:
+; SI-NOT: v_cmp_class_f32
+; SI: s_endpgm
+define void @test_isfinite_not_pattern_1(i32 addrspace(1)* nocapture %out, float %x) #0 {
+ %ord = fcmp ord float %x, 0.000000e+00
+ %ninf = fcmp une float %x, 0x7FF0000000000000
+ %and = and i1 %ord, %ninf
+ %ext = zext i1 %and to i32
+ store i32 %ext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; fabs of different value
+; SI-LABEL: {{^}}test_isfinite_not_pattern_2:
+; SI-NOT: v_cmp_class_f32
+; SI: s_endpgm
+define void @test_isfinite_not_pattern_2(i32 addrspace(1)* nocapture %out, float %x, float %y) #0 {
+ %ord = fcmp ord float %x, 0.000000e+00
+ %x.fabs = tail call float @llvm.fabs.f32(float %y) #1
+ %ninf = fcmp une float %x.fabs, 0x7FF0000000000000
+ %and = and i1 %ord, %ninf
+ %ext = zext i1 %and to i32
+ store i32 %ext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; Wrong ordered compare type
+; SI-LABEL: {{^}}test_isfinite_not_pattern_3:
+; SI-NOT: v_cmp_class_f32
+; SI: s_endpgm
+define void @test_isfinite_not_pattern_3(i32 addrspace(1)* nocapture %out, float %x) #0 {
+ %ord = fcmp uno float %x, 0.000000e+00
+ %x.fabs = tail call float @llvm.fabs.f32(float %x) #1
+ %ninf = fcmp une float %x.fabs, 0x7FF0000000000000
+ %and = and i1 %ord, %ninf
+ %ext = zext i1 %and to i32
+ store i32 %ext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; Wrong unordered compare
+; SI-LABEL: {{^}}test_isfinite_not_pattern_4:
+; SI-NOT: v_cmp_class_f32
+; SI: s_endpgm
+define void @test_isfinite_not_pattern_4(i32 addrspace(1)* nocapture %out, float %x) #0 {
+ %ord = fcmp ord float %x, 0.000000e+00
+ %x.fabs = tail call float @llvm.fabs.f32(float %x) #1
+ %ninf = fcmp one float %x.fabs, 0x7FF0000000000000
+ %and = and i1 %ord, %ninf
+ %ext = zext i1 %and to i32
+ store i32 %ext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/fp16_to_fp.ll b/test/CodeGen/R600/fp16_to_fp.ll
index 777eadc34ead..be84582a73a6 100644
--- a/test/CodeGen/R600/fp16_to_fp.ll
+++ b/test/CodeGen/R600/fp16_to_fp.ll
@@ -1,12 +1,12 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone
-; SI-LABEL: @test_convert_fp16_to_fp32:
-; SI: BUFFER_LOAD_USHORT [[VAL:v[0-9]+]]
-; SI: V_CVT_F32_F16_e32 [[RESULT:v[0-9]+]], [[VAL]]
-; SI: BUFFER_STORE_DWORD [[RESULT]]
+; SI-LABEL: {{^}}test_convert_fp16_to_fp32:
+; SI: buffer_load_ushort [[VAL:v[0-9]+]]
+; SI: v_cvt_f32_f16_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[RESULT]]
define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
%val = load i16 addrspace(1)* %in, align 2
%cvt = call float @llvm.convert.from.fp16.f32(i16 %val) nounwind readnone
@@ -15,11 +15,11 @@ define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 add
}
-; SI-LABEL: @test_convert_fp16_to_fp64:
-; SI: BUFFER_LOAD_USHORT [[VAL:v[0-9]+]]
-; SI: V_CVT_F32_F16_e32 [[RESULT32:v[0-9]+]], [[VAL]]
-; SI: V_CVT_F64_F32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[RESULT32]]
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
+; SI-LABEL: {{^}}test_convert_fp16_to_fp64:
+; SI: buffer_load_ushort [[VAL:v[0-9]+]]
+; SI: v_cvt_f32_f16_e32 [[RESULT32:v[0-9]+]], [[VAL]]
+; SI: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[RESULT32]]
+; SI: buffer_store_dwordx2 [[RESULT]]
define void @test_convert_fp16_to_fp64(double addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
%val = load i16 addrspace(1)* %in, align 2
%cvt = call double @llvm.convert.from.fp16.f64(i16 %val) nounwind readnone
diff --git a/test/CodeGen/R600/fp32_to_fp16.ll b/test/CodeGen/R600/fp32_to_fp16.ll
index 6b5ff00b5f60..43dd09b5ec05 100644
--- a/test/CodeGen/R600/fp32_to_fp16.ll
+++ b/test/CodeGen/R600/fp32_to_fp16.ll
@@ -1,11 +1,11 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
-; SI-LABEL: @test_convert_fp32_to_fp16:
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]]
-; SI: V_CVT_F16_F32_e32 [[RESULT:v[0-9]+]], [[VAL]]
-; SI: BUFFER_STORE_SHORT [[RESULT]]
+; SI-LABEL: {{^}}test_convert_fp32_to_fp16:
+; SI: buffer_load_dword [[VAL:v[0-9]+]]
+; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: buffer_store_short [[RESULT]]
define void @test_convert_fp32_to_fp16(i16 addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%val = load float addrspace(1)* %in, align 4
%cvt = call i16 @llvm.convert.to.fp16.f32(float %val) nounwind readnone
diff --git a/test/CodeGen/R600/fp64_to_sint.ll b/test/CodeGen/R600/fp64_to_sint.ll
deleted file mode 100644
index 185e21c9caa3..000000000000
--- a/test/CodeGen/R600/fp64_to_sint.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
-
-; CHECK: @fp64_to_sint
-; CHECK: V_CVT_I32_F64_e32
-define void @fp64_to_sint(i32 addrspace(1)* %out, double %in) {
- %result = fptosi double %in to i32
- store i32 %result, i32 addrspace(1)* %out
- ret void
-}
diff --git a/test/CodeGen/R600/fp_to_sint.f64.ll b/test/CodeGen/R600/fp_to_sint.f64.ll
new file mode 100644
index 000000000000..e6418477a9b4
--- /dev/null
+++ b/test/CodeGen/R600/fp_to_sint.f64.ll
@@ -0,0 +1,56 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+; FUNC-LABEL: @fp_to_sint_f64_i32
+; SI: v_cvt_i32_f64_e32
+define void @fp_to_sint_f64_i32(i32 addrspace(1)* %out, double %in) {
+ %result = fptosi double %in to i32
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: @fp_to_sint_v2f64_v2i32
+; SI: v_cvt_i32_f64_e32
+; SI: v_cvt_i32_f64_e32
+define void @fp_to_sint_v2f64_v2i32(<2 x i32> addrspace(1)* %out, <2 x double> %in) {
+ %result = fptosi <2 x double> %in to <2 x i32>
+ store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: @fp_to_sint_v4f64_v4i32
+; SI: v_cvt_i32_f64_e32
+; SI: v_cvt_i32_f64_e32
+; SI: v_cvt_i32_f64_e32
+; SI: v_cvt_i32_f64_e32
+define void @fp_to_sint_v4f64_v4i32(<4 x i32> addrspace(1)* %out, <4 x double> %in) {
+ %result = fptosi <4 x double> %in to <4 x i32>
+ store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: @fp_to_sint_i64_f64
+; CI-DAG: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
+; CI-DAG: v_trunc_f64_e32 [[TRUNC:v\[[0-9]+:[0-9]+\]]], [[VAL]]
+; CI-DAG: s_mov_b32 s[[K0_LO:[0-9]+]], 0{{$}}
+; CI-DAG: s_mov_b32 s[[K0_HI:[0-9]+]], 0x3df00000
+
+; CI-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[VAL]], s{{\[}}[[K0_LO]]:[[K0_HI]]{{\]}}
+; CI-DAG: v_floor_f64_e32 [[FLOOR:v\[[0-9]+:[0-9]+\]]], [[MUL]]
+
+; CI-DAG: s_mov_b32 s[[K1_HI:[0-9]+]], 0xc1f00000
+
+; CI-DAG: v_fma_f64 [[FMA:v\[[0-9]+:[0-9]+\]]], [[FLOOR]], s{{\[[0-9]+}}:[[K1_HI]]{{\]}}, [[TRUNC]]
+; CI-DAG: v_cvt_u32_f64_e32 v[[LO:[0-9]+]], [[FMA]]
+; CI-DAG: v_cvt_i32_f64_e32 v[[HI:[0-9]+]], [[FLOOR]]
+; CI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @fp_to_sint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep = getelementptr double addrspace(1)* %in, i32 %tid
+ %val = load double addrspace(1)* %gep, align 8
+ %cast = fptosi double %val to i64
+ store i64 %cast, i64 addrspace(1)* %out, align 8
+ ret void
+}
diff --git a/test/CodeGen/R600/fp_to_sint.ll b/test/CodeGen/R600/fp_to_sint.ll
index 235045aaaaaa..35cfb03d39b4 100644
--- a/test/CodeGen/R600/fp_to_sint.ll
+++ b/test/CodeGen/R600/fp_to_sint.ll
@@ -1,26 +1,36 @@
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=EG --check-prefix=FUNC
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s --check-prefix=SI --check-prefix=FUNC
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s --check-prefix=SI --check-prefix=FUNC
-; FUNC-LABEL: @fp_to_sint_v2i32
+; FUNC-LABEL: {{^}}fp_to_sint_i32:
+; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; SI: v_cvt_i32_f32_e32
+; SI: s_endpgm
+define void @fp_to_sint_i32 (i32 addrspace(1)* %out, float %in) {
+ %conv = fptosi float %in to i32
+ store i32 %conv, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fp_to_sint_v2i32:
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-; SI: V_CVT_I32_F32_e32
-; SI: V_CVT_I32_F32_e32
+; SI: v_cvt_i32_f32_e32
+; SI: v_cvt_i32_f32_e32
define void @fp_to_sint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
%result = fptosi <2 x float> %in to <2 x i32>
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
}
-; FUNC-LABEL: @fp_to_sint_v4i32
+; FUNC-LABEL: {{^}}fp_to_sint_v4i32:
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW]}}
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-; SI: V_CVT_I32_F32_e32
-; SI: V_CVT_I32_F32_e32
-; SI: V_CVT_I32_F32_e32
-; SI: V_CVT_I32_F32_e32
+; SI: v_cvt_i32_f32_e32
+; SI: v_cvt_i32_f32_e32
+; SI: v_cvt_i32_f32_e32
+; SI: v_cvt_i32_f32_e32
define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%value = load <4 x float> addrspace(1) * %in
%result = fptosi <4 x float> %value to <4 x i32>
@@ -28,7 +38,7 @@ define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspac
ret void
}
-; FUNC-LABEL: @fp_to_sint_i64
+; FUNC-LABEL: {{^}}fp_to_sint_i64:
; EG-DAG: AND_INT
; EG-DAG: LSHR
@@ -49,11 +59,11 @@ define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspac
; EG-DAG: XOR_INT
; EG: SUB_INT
; EG-DAG: SUB_INT
-; EG-DAG: CNDGE_INT
-; EG-DAG: CNDGE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
; Check that the compiler doesn't crash with a "cannot select" error
-; SI: S_ENDPGM
+; SI: s_endpgm
define void @fp_to_sint_i64 (i64 addrspace(1)* %out, float %in) {
entry:
%0 = fptosi float %in to i64
@@ -61,7 +71,7 @@ entry:
ret void
}
-; FUNC: @fp_to_sint_v2i64
+; FUNC: {{^}}fp_to_sint_v2i64:
; EG-DAG: AND_INT
; EG-DAG: LSHR
; EG-DAG: SUB_INT
@@ -81,8 +91,8 @@ entry:
; EG-DAG: XOR_INT
; EG-DAG: SUB_INT
; EG-DAG: SUB_INT
-; EG-DAG: CNDGE_INT
-; EG-DAG: CNDGE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
; EG-DAG: AND_INT
; EG-DAG: LSHR
; EG-DAG: SUB_INT
@@ -102,17 +112,17 @@ entry:
; EG-DAG: XOR_INT
; EG-DAG: SUB_INT
; EG-DAG: SUB_INT
-; EG-DAG: CNDGE_INT
-; EG-DAG: CNDGE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
-; SI: S_ENDPGM
+; SI: s_endpgm
define void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
%conv = fptosi <2 x float> %x to <2 x i64>
store <2 x i64> %conv, <2 x i64> addrspace(1)* %out
ret void
}
-; FUNC: @fp_to_sint_v4i64
+; FUNC: {{^}}fp_to_sint_v4i64:
; EG-DAG: AND_INT
; EG-DAG: LSHR
; EG-DAG: SUB_INT
@@ -132,8 +142,8 @@ define void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
; EG-DAG: XOR_INT
; EG-DAG: SUB_INT
; EG-DAG: SUB_INT
-; EG-DAG: CNDGE_INT
-; EG-DAG: CNDGE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
; EG-DAG: AND_INT
; EG-DAG: LSHR
; EG-DAG: SUB_INT
@@ -153,8 +163,8 @@ define void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
; EG-DAG: XOR_INT
; EG-DAG: SUB_INT
; EG-DAG: SUB_INT
-; EG-DAG: CNDGE_INT
-; EG-DAG: CNDGE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
; EG-DAG: AND_INT
; EG-DAG: LSHR
; EG-DAG: SUB_INT
@@ -174,8 +184,8 @@ define void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
; EG-DAG: XOR_INT
; EG-DAG: SUB_INT
; EG-DAG: SUB_INT
-; EG-DAG: CNDGE_INT
-; EG-DAG: CNDGE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
; EG-DAG: AND_INT
; EG-DAG: LSHR
; EG-DAG: SUB_INT
@@ -195,10 +205,10 @@ define void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
; EG-DAG: XOR_INT
; EG-DAG: SUB_INT
; EG-DAG: SUB_INT
-; EG-DAG: CNDGE_INT
-; EG-DAG: CNDGE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
-; SI: S_ENDPGM
+; SI: s_endpgm
define void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) {
%conv = fptosi <4 x float> %x to <4 x i64>
store <4 x i64> %conv, <4 x i64> addrspace(1)* %out
diff --git a/test/CodeGen/R600/fp_to_uint.f64.ll b/test/CodeGen/R600/fp_to_uint.f64.ll
index bf607cef0884..1ffe2faadf33 100644
--- a/test/CodeGen/R600/fp_to_uint.f64.ll
+++ b/test/CodeGen/R600/fp_to_uint.f64.ll
@@ -1,9 +1,70 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
-; SI-LABEL: @fp_to_uint_i32_f64
-; SI: V_CVT_U32_F64_e32
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+; SI-LABEL: {{^}}fp_to_uint_i32_f64:
+; SI: v_cvt_u32_f64_e32
define void @fp_to_uint_i32_f64(i32 addrspace(1)* %out, double %in) {
%cast = fptoui double %in to i32
store i32 %cast, i32 addrspace(1)* %out, align 4
ret void
}
+
+; SI-LABEL: @fp_to_uint_v2i32_v2f64
+; SI: v_cvt_u32_f64_e32
+; SI: v_cvt_u32_f64_e32
+define void @fp_to_uint_v2i32_v2f64(<2 x i32> addrspace(1)* %out, <2 x double> %in) {
+ %cast = fptoui <2 x double> %in to <2 x i32>
+ store <2 x i32> %cast, <2 x i32> addrspace(1)* %out, align 8
+ ret void
+}
+
+; SI-LABEL: @fp_to_uint_v4i32_v4f64
+; SI: v_cvt_u32_f64_e32
+; SI: v_cvt_u32_f64_e32
+; SI: v_cvt_u32_f64_e32
+; SI: v_cvt_u32_f64_e32
+define void @fp_to_uint_v4i32_v4f64(<4 x i32> addrspace(1)* %out, <4 x double> %in) {
+ %cast = fptoui <4 x double> %in to <4 x i32>
+ store <4 x i32> %cast, <4 x i32> addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: @fp_to_uint_i64_f64
+; CI-DAG: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
+; CI-DAG: v_trunc_f64_e32 [[TRUNC:v\[[0-9]+:[0-9]+\]]], [[VAL]]
+; CI-DAG: s_mov_b32 s[[K0_LO:[0-9]+]], 0{{$}}
+; CI-DAG: s_mov_b32 s[[K0_HI:[0-9]+]], 0x3df00000
+
+; CI-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[VAL]], s{{\[}}[[K0_LO]]:[[K0_HI]]{{\]}}
+; CI-DAG: v_floor_f64_e32 [[FLOOR:v\[[0-9]+:[0-9]+\]]], [[MUL]]
+
+; CI-DAG: s_mov_b32 s[[K1_HI:[0-9]+]], 0xc1f00000
+
+; CI-DAG: v_fma_f64 [[FMA:v\[[0-9]+:[0-9]+\]]], [[FLOOR]], s{{\[[0-9]+}}:[[K1_HI]]{{\]}}, [[TRUNC]]
+; CI-DAG: v_cvt_u32_f64_e32 v[[LO:[0-9]+]], [[FMA]]
+; CI-DAG: v_cvt_u32_f64_e32 v[[HI:[0-9]+]], [[FLOOR]]
+; CI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @fp_to_uint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep = getelementptr double addrspace(1)* %in, i32 %tid
+ %val = load double addrspace(1)* %gep, align 8
+ %cast = fptoui double %val to i64
+ store i64 %cast, i64 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: @fp_to_uint_v2i64_v2f64
+define void @fp_to_uint_v2i64_v2f64(<2 x i64> addrspace(1)* %out, <2 x double> %in) {
+ %cast = fptoui <2 x double> %in to <2 x i64>
+ store <2 x i64> %cast, <2 x i64> addrspace(1)* %out, align 16
+ ret void
+}
+
+; SI-LABEL: @fp_to_uint_v4i64_v4f64
+define void @fp_to_uint_v4i64_v4f64(<4 x i64> addrspace(1)* %out, <4 x double> %in) {
+ %cast = fptoui <4 x double> %in to <4 x i64>
+ store <4 x i64> %cast, <4 x i64> addrspace(1)* %out, align 32
+ ret void
+}
diff --git a/test/CodeGen/R600/fp_to_uint.ll b/test/CodeGen/R600/fp_to_uint.ll
index a13018bdfecf..5970adf999c9 100644
--- a/test/CodeGen/R600/fp_to_uint.ll
+++ b/test/CodeGen/R600/fp_to_uint.ll
@@ -1,38 +1,47 @@
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=EG --check-prefix=FUNC
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s --check-prefix=SI --check-prefix=FUNC
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=EG -check-prefix=FUNC
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC
-; FUNC-LABEL: @fp_to_uint_v2i32
+; FUNC-LABEL: {{^}}fp_to_uint_f32_to_i32:
+; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+
+; SI: v_cvt_u32_f32_e32
+; SI: s_endpgm
+define void @fp_to_uint_f32_to_i32 (i32 addrspace(1)* %out, float %in) {
+ %conv = fptoui float %in to i32
+ store i32 %conv, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fp_to_uint_v2f32_to_v2i32:
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI: V_CVT_U32_F32_e32
-; SI: V_CVT_U32_F32_e32
-define void @fp_to_uint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
+; SI: v_cvt_u32_f32_e32
+; SI: v_cvt_u32_f32_e32
+define void @fp_to_uint_v2f32_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
%result = fptoui <2 x float> %in to <2 x i32>
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
}
-; FUNC-LABEL: @fp_to_uint_v4i32
+; FUNC-LABEL: {{^}}fp_to_uint_v4f32_to_v4i32:
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-; SI: V_CVT_U32_F32_e32
-; SI: V_CVT_U32_F32_e32
-; SI: V_CVT_U32_F32_e32
-; SI: V_CVT_U32_F32_e32
+; SI: v_cvt_u32_f32_e32
+; SI: v_cvt_u32_f32_e32
+; SI: v_cvt_u32_f32_e32
+; SI: v_cvt_u32_f32_e32
-define void @fp_to_uint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+define void @fp_to_uint_v4f32_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%value = load <4 x float> addrspace(1) * %in
%result = fptoui <4 x float> %value to <4 x i32>
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
}
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s
-; FUNC: @fp_to_uint_i64
+; FUNC: {{^}}fp_to_uint_f32_to_i64:
; EG-DAG: AND_INT
; EG-DAG: LSHR
; EG-DAG: SUB_INT
@@ -52,17 +61,17 @@ define void @fp_to_uint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspac
; EG-DAG: XOR_INT
; EG: SUB_INT
; EG-DAG: SUB_INT
-; EG-DAG: CNDGE_INT
-; EG-DAG: CNDGE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
-; SI: S_ENDPGM
-define void @fp_to_uint_i64(i64 addrspace(1)* %out, float %x) {
+; SI: s_endpgm
+define void @fp_to_uint_f32_to_i64(i64 addrspace(1)* %out, float %x) {
%conv = fptoui float %x to i64
store i64 %conv, i64 addrspace(1)* %out
ret void
}
-; FUNC: @fp_to_uint_v2i64
+; FUNC: {{^}}fp_to_uint_v2f32_to_v2i64:
; EG-DAG: AND_INT
; EG-DAG: LSHR
; EG-DAG: SUB_INT
@@ -82,8 +91,8 @@ define void @fp_to_uint_i64(i64 addrspace(1)* %out, float %x) {
; EG-DAG: XOR_INT
; EG-DAG: SUB_INT
; EG-DAG: SUB_INT
-; EG-DAG: CNDGE_INT
-; EG-DAG: CNDGE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
; EG-DAG: AND_INT
; EG-DAG: LSHR
; EG-DAG: SUB_INT
@@ -103,17 +112,17 @@ define void @fp_to_uint_i64(i64 addrspace(1)* %out, float %x) {
; EG-DAG: XOR_INT
; EG-DAG: SUB_INT
; EG-DAG: SUB_INT
-; EG-DAG: CNDGE_INT
-; EG-DAG: CNDGE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
-; SI: S_ENDPGM
-define void @fp_to_uint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
+; SI: s_endpgm
+define void @fp_to_uint_v2f32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
%conv = fptoui <2 x float> %x to <2 x i64>
store <2 x i64> %conv, <2 x i64> addrspace(1)* %out
ret void
}
-; FUNC: @fp_to_uint_v4i64
+; FUNC: {{^}}fp_to_uint_v4f32_to_v4i64:
; EG-DAG: AND_INT
; EG-DAG: LSHR
; EG-DAG: SUB_INT
@@ -133,8 +142,8 @@ define void @fp_to_uint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
; EG-DAG: XOR_INT
; EG-DAG: SUB_INT
; EG-DAG: SUB_INT
-; EG-DAG: CNDGE_INT
-; EG-DAG: CNDGE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
; EG-DAG: AND_INT
; EG-DAG: LSHR
; EG-DAG: SUB_INT
@@ -154,8 +163,8 @@ define void @fp_to_uint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
; EG-DAG: XOR_INT
; EG-DAG: SUB_INT
; EG-DAG: SUB_INT
-; EG-DAG: CNDGE_INT
-; EG-DAG: CNDGE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
; EG-DAG: AND_INT
; EG-DAG: LSHR
; EG-DAG: SUB_INT
@@ -175,8 +184,8 @@ define void @fp_to_uint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
; EG-DAG: XOR_INT
; EG-DAG: SUB_INT
; EG-DAG: SUB_INT
-; EG-DAG: CNDGE_INT
-; EG-DAG: CNDGE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
; EG-DAG: AND_INT
; EG-DAG: LSHR
; EG-DAG: SUB_INT
@@ -196,11 +205,11 @@ define void @fp_to_uint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
; EG-DAG: XOR_INT
; EG-DAG: SUB_INT
; EG-DAG: SUB_INT
-; EG-DAG: CNDGE_INT
-; EG-DAG: CNDGE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
-; SI: S_ENDPGM
-define void @fp_to_uint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) {
+; SI: s_endpgm
+define void @fp_to_uint_v4f32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) {
%conv = fptoui <4 x float> %x to <4 x i64>
store <4 x i64> %conv, <4 x i64> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/fpext.ll b/test/CodeGen/R600/fpext.ll
index 143ee79fa151..320545edf56b 100644
--- a/test/CodeGen/R600/fpext.ll
+++ b/test/CodeGen/R600/fpext.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
-; CHECK: @fpext
-; CHECK: V_CVT_F64_F32_e32
+; CHECK: {{^}}fpext:
+; CHECK: v_cvt_f64_f32_e32
define void @fpext(double addrspace(1)* %out, float %in) {
%result = fpext float %in to double
store double %result, double addrspace(1)* %out
diff --git a/test/CodeGen/R600/fptrunc.ll b/test/CodeGen/R600/fptrunc.ll
index 20a8c00ba498..15ae4e18ff38 100644
--- a/test/CodeGen/R600/fptrunc.ll
+++ b/test/CodeGen/R600/fptrunc.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
-; CHECK: @fptrunc
-; CHECK: V_CVT_F32_F64_e32
+; CHECK: {{^}}fptrunc:
+; CHECK: v_cvt_f32_f64_e32
define void @fptrunc(float addrspace(1)* %out, double %in) {
%result = fptrunc double %in to float
store float %result, float addrspace(1)* %out
diff --git a/test/CodeGen/R600/frem.ll b/test/CodeGen/R600/frem.ll
new file mode 100644
index 000000000000..50d6687abeec
--- /dev/null
+++ b/test/CodeGen/R600/frem.ll
@@ -0,0 +1,103 @@
+; RUN: llc -march=amdgcn -mcpu=SI -enable-misched < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}frem_f32:
+; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{.*$}}
+; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16
+; SI-DAG: v_cmp
+; SI-DAG: v_mul_f32
+; SI: v_rcp_f32_e32
+; SI: v_mul_f32_e32
+; SI: v_mul_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_mad_f32
+; SI: s_endpgm
+define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
+ float addrspace(1)* %in2) #0 {
+ %gep2 = getelementptr float addrspace(1)* %in2, i32 4
+ %r0 = load float addrspace(1)* %in1, align 4
+ %r1 = load float addrspace(1)* %gep2, align 4
+ %r2 = frem float %r0, %r1
+ store float %r2, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}unsafe_frem_f32:
+; SI: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16
+; SI: buffer_load_dword [[X:v[0-9]+]], {{.*}}
+; SI: v_rcp_f32_e32 [[INVY:v[0-9]+]], [[Y]]
+; SI: v_mul_f32_e32 [[DIV:v[0-9]+]], [[INVY]], [[X]]
+; SI: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[DIV]]
+; SI: v_mad_f32 [[RESULT:v[0-9]+]], -[[TRUNC]], [[Y]], [[X]]
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
+ float addrspace(1)* %in2) #1 {
+ %gep2 = getelementptr float addrspace(1)* %in2, i32 4
+ %r0 = load float addrspace(1)* %in1, align 4
+ %r1 = load float addrspace(1)* %gep2, align 4
+ %r2 = frem float %r0, %r1
+ store float %r2, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; TODO: This should check something when f64 fdiv is implemented
+; correctly
+
+; FUNC-LABEL: {{^}}frem_f64:
+; SI: s_endpgm
+define void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+ double addrspace(1)* %in2) #0 {
+ %r0 = load double addrspace(1)* %in1, align 8
+ %r1 = load double addrspace(1)* %in2, align 8
+ %r2 = frem double %r0, %r1
+ store double %r2, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}unsafe_frem_f64:
+; SI: v_rcp_f64_e32
+; SI: v_mul_f64
+; SI: v_bfe_u32
+; SI: v_fma_f64
+; SI: s_endpgm
+define void @unsafe_frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+ double addrspace(1)* %in2) #1 {
+ %r0 = load double addrspace(1)* %in1, align 8
+ %r1 = load double addrspace(1)* %in2, align 8
+ %r2 = frem double %r0, %r1
+ store double %r2, double addrspace(1)* %out, align 8
+ ret void
+}
+
+define void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in1,
+ <2 x float> addrspace(1)* %in2) #0 {
+ %gep2 = getelementptr <2 x float> addrspace(1)* %in2, i32 4
+ %r0 = load <2 x float> addrspace(1)* %in1, align 8
+ %r1 = load <2 x float> addrspace(1)* %gep2, align 8
+ %r2 = frem <2 x float> %r0, %r1
+ store <2 x float> %r2, <2 x float> addrspace(1)* %out, align 8
+ ret void
+}
+
+define void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in1,
+ <4 x float> addrspace(1)* %in2) #0 {
+ %gep2 = getelementptr <4 x float> addrspace(1)* %in2, i32 4
+ %r0 = load <4 x float> addrspace(1)* %in1, align 16
+ %r1 = load <4 x float> addrspace(1)* %gep2, align 16
+ %r2 = frem <4 x float> %r0, %r1
+ store <4 x float> %r2, <4 x float> addrspace(1)* %out, align 16
+ ret void
+}
+
+define void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
+ <2 x double> addrspace(1)* %in2) #0 {
+ %gep2 = getelementptr <2 x double> addrspace(1)* %in2, i32 4
+ %r0 = load <2 x double> addrspace(1)* %in1, align 16
+ %r1 = load <2 x double> addrspace(1)* %gep2, align 16
+ %r2 = frem <2 x double> %r0, %r1
+ store <2 x double> %r2, <2 x double> addrspace(1)* %out, align 16
+ ret void
+}
+
+attributes #0 = { nounwind "unsafe-fp-math"="false" }
+attributes #1 = { nounwind "unsafe-fp-math"="true" }
diff --git a/test/CodeGen/R600/fsqrt.ll b/test/CodeGen/R600/fsqrt.ll
index ae50b17d38fc..0d1095cf2683 100644
--- a/test/CodeGen/R600/fsqrt.ll
+++ b/test/CodeGen/R600/fsqrt.ll
@@ -1,7 +1,10 @@
-; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck %s
-; CHECK: @fsqrt_f32
-; CHECK: V_SQRT_F32_e32 {{v[0-9]+, v[0-9]+}}
+; Run with unsafe-fp-math to make sure nothing tries to turn this into 1 / rsqrt(x)
+
+; CHECK: {{^}}fsqrt_f32:
+; CHECK: v_sqrt_f32_e32 {{v[0-9]+, v[0-9]+}}
define void @fsqrt_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%r0 = load float addrspace(1)* %in
@@ -10,8 +13,8 @@ define void @fsqrt_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
ret void
}
-; CHECK: @fsqrt_f64
-; CHECK: V_SQRT_F64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; CHECK: {{^}}fsqrt_f64:
+; CHECK: v_sqrt_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
define void @fsqrt_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
%r0 = load double addrspace(1)* %in
diff --git a/test/CodeGen/R600/fsub.ll b/test/CodeGen/R600/fsub.ll
index 4f74efba4d8b..4fe47e7badf3 100644
--- a/test/CodeGen/R600/fsub.ll
+++ b/test/CodeGen/R600/fsub.ll
@@ -1,14 +1,25 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
-
-; R600-CHECK: @fsub_f32
-; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, -KC0[2].W
-; SI-CHECK: @fsub_f32
-; SI-CHECK: V_SUB_F32
-define void @fsub_f32(float addrspace(1)* %out, float %a, float %b) {
-entry:
- %0 = fsub float %a, %b
- store float %0, float addrspace(1)* %out
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+
+; FUNC-LABEL: {{^}}v_fsub_f32:
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+define void @v_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
+ %b_ptr = getelementptr float addrspace(1)* %in, i32 1
+ %a = load float addrspace(1)* %in, align 4
+ %b = load float addrspace(1)* %b_ptr, align 4
+ %result = fsub float %a, %b
+ store float %result, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_fsub_f32:
+; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, -KC0[2].W
+
+; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+define void @s_fsub_f32(float addrspace(1)* %out, float %a, float %b) {
+ %sub = fsub float %a, %b
+ store float %sub, float addrspace(1)* %out, align 4
ret void
}
@@ -16,34 +27,48 @@ declare float @llvm.R600.load.input(i32) readnone
declare void @llvm.AMDGPU.store.output(float, i32)
-; R600-CHECK: @fsub_v2f32
-; R600-CHECK-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, -KC0[3].Z
-; R600-CHECK-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, -KC0[3].Y
-; SI-CHECK: @fsub_v2f32
-; SI-CHECK: V_SUB_F32
-; SI-CHECK: V_SUB_F32
+; FUNC-LABEL: {{^}}fsub_v2f32:
+; R600-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, -KC0[3].Z
+; R600-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, -KC0[3].Y
+
+; FIXME: Should be using SGPR directly for first operand
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
define void @fsub_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
-entry:
- %0 = fsub <2 x float> %a, %b
- store <2 x float> %0, <2 x float> addrspace(1)* %out
+ %sub = fsub <2 x float> %a, %b
+ store <2 x float> %sub, <2 x float> addrspace(1)* %out, align 8
ret void
}
-; R600-CHECK: @fsub_v4f32
-; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
-; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
-; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
-; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
-; SI-CHECK: @fsub_v4f32
-; SI-CHECK: V_SUB_F32
-; SI-CHECK: V_SUB_F32
-; SI-CHECK: V_SUB_F32
-; SI-CHECK: V_SUB_F32
-define void @fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+; FUNC-LABEL: {{^}}v_fsub_v4f32:
+; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
+; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
+; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
+; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
+
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+define void @v_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
- %a = load <4 x float> addrspace(1) * %in
- %b = load <4 x float> addrspace(1) * %b_ptr
+ %a = load <4 x float> addrspace(1)* %in, align 16
+ %b = load <4 x float> addrspace(1)* %b_ptr, align 16
+ %result = fsub <4 x float> %a, %b
+ store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16
+ ret void
+}
+
+; FIXME: Should be using SGPR directly for first operand
+
+; FUNC-LABEL: {{^}}s_fsub_v4f32:
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: s_endpgm
+define void @s_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) {
%result = fsub <4 x float> %a, %b
- store <4 x float> %result, <4 x float> addrspace(1)* %out
+ store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16
ret void
}
diff --git a/test/CodeGen/R600/fsub64.ll b/test/CodeGen/R600/fsub64.ll
index f5e5708f1b41..d0f894607a61 100644
--- a/test/CodeGen/R600/fsub64.ll
+++ b/test/CodeGen/R600/fsub64.ll
@@ -1,7 +1,7 @@
-; RUN: llc -march=r600 -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-; SI-LABEL: @fsub_f64:
-; SI: V_ADD_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
+; SI-LABEL: {{^}}fsub_f64:
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
define void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
%r0 = load double addrspace(1)* %in1
diff --git a/test/CodeGen/R600/ftrunc.f64.ll b/test/CodeGen/R600/ftrunc.f64.ll
new file mode 100644
index 000000000000..2c7217ef0561
--- /dev/null
+++ b/test/CodeGen/R600/ftrunc.f64.ll
@@ -0,0 +1,110 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare double @llvm.trunc.f64(double) nounwind readnone
+declare <2 x double> @llvm.trunc.v2f64(<2 x double>) nounwind readnone
+declare <3 x double> @llvm.trunc.v3f64(<3 x double>) nounwind readnone
+declare <4 x double> @llvm.trunc.v4f64(<4 x double>) nounwind readnone
+declare <8 x double> @llvm.trunc.v8f64(<8 x double>) nounwind readnone
+declare <16 x double> @llvm.trunc.v16f64(<16 x double>) nounwind readnone
+
+; FUNC-LABEL: {{^}}v_ftrunc_f64:
+; CI: v_trunc_f64
+; SI: v_bfe_u32 {{v[0-9]+}}, {{v[0-9]+}}, 20, 11
+; SI: s_endpgm
+define void @v_ftrunc_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
+ %x = load double addrspace(1)* %in, align 8
+ %y = call double @llvm.trunc.f64(double %x) nounwind readnone
+ store double %y, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}ftrunc_f64:
+; CI: v_trunc_f64_e32
+
+; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
+; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
+; SI: s_lshr_b64
+; SI: s_not_b64
+; SI: s_and_b64
+; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
+; SI: cmp_lt_i32
+; SI: cndmask_b32
+; SI: cndmask_b32
+; SI: cmp_gt_i32
+; SI: cndmask_b32
+; SI: cndmask_b32
+; SI: s_endpgm
+define void @ftrunc_f64(double addrspace(1)* %out, double %x) {
+ %y = call double @llvm.trunc.f64(double %x) nounwind readnone
+ store double %y, double addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}ftrunc_v2f64:
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+define void @ftrunc_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
+ %y = call <2 x double> @llvm.trunc.v2f64(<2 x double> %x) nounwind readnone
+ store <2 x double> %y, <2 x double> addrspace(1)* %out
+ ret void
+}
+
+; FIXME-FUNC-LABEL: {{^}}ftrunc_v3f64:
+; FIXME-CI: v_trunc_f64_e32
+; FIXME-CI: v_trunc_f64_e32
+; FIXME-CI: v_trunc_f64_e32
+; define void @ftrunc_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
+; %y = call <3 x double> @llvm.trunc.v3f64(<3 x double> %x) nounwind readnone
+; store <3 x double> %y, <3 x double> addrspace(1)* %out
+; ret void
+; }
+
+; FUNC-LABEL: {{^}}ftrunc_v4f64:
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+define void @ftrunc_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
+ %y = call <4 x double> @llvm.trunc.v4f64(<4 x double> %x) nounwind readnone
+ store <4 x double> %y, <4 x double> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}ftrunc_v8f64:
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+define void @ftrunc_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
+ %y = call <8 x double> @llvm.trunc.v8f64(<8 x double> %x) nounwind readnone
+ store <8 x double> %y, <8 x double> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}ftrunc_v16f64:
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+define void @ftrunc_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
+ %y = call <16 x double> @llvm.trunc.v16f64(<16 x double> %x) nounwind readnone
+ store <16 x double> %y, <16 x double> addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/ftrunc.ll b/test/CodeGen/R600/ftrunc.ll
index 0d7d4679fe3d..39eb2b5accbf 100644
--- a/test/CodeGen/R600/ftrunc.ll
+++ b/test/CodeGen/R600/ftrunc.ll
@@ -1,5 +1,5 @@
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG --check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI --check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI --check-prefix=FUNC %s
declare float @llvm.trunc.f32(float) nounwind readnone
declare <2 x float> @llvm.trunc.v2f32(<2 x float>) nounwind readnone
@@ -8,55 +8,55 @@ declare <4 x float> @llvm.trunc.v4f32(<4 x float>) nounwind readnone
declare <8 x float> @llvm.trunc.v8f32(<8 x float>) nounwind readnone
declare <16 x float> @llvm.trunc.v16f32(<16 x float>) nounwind readnone
-; FUNC-LABEL: @ftrunc_f32:
+; FUNC-LABEL: {{^}}ftrunc_f32:
; EG: TRUNC
-; SI: V_TRUNC_F32_e32
+; SI: v_trunc_f32_e32
define void @ftrunc_f32(float addrspace(1)* %out, float %x) {
%y = call float @llvm.trunc.f32(float %x) nounwind readnone
store float %y, float addrspace(1)* %out
ret void
}
-; FUNC-LABEL: @ftrunc_v2f32:
+; FUNC-LABEL: {{^}}ftrunc_v2f32:
; EG: TRUNC
; EG: TRUNC
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
define void @ftrunc_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) {
%y = call <2 x float> @llvm.trunc.v2f32(<2 x float> %x) nounwind readnone
store <2 x float> %y, <2 x float> addrspace(1)* %out
ret void
}
-; FIXME-FUNC-LABEL: @ftrunc_v3f32:
+; FIXME-FUNC-LABEL: {{^}}ftrunc_v3f32:
; FIXME-EG: TRUNC
; FIXME-EG: TRUNC
; FIXME-EG: TRUNC
-; FIXME-SI: V_TRUNC_F32_e32
-; FIXME-SI: V_TRUNC_F32_e32
-; FIXME-SI: V_TRUNC_F32_e32
+; FIXME-SI: v_trunc_f32_e32
+; FIXME-SI: v_trunc_f32_e32
+; FIXME-SI: v_trunc_f32_e32
; define void @ftrunc_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %x) {
; %y = call <3 x float> @llvm.trunc.v3f32(<3 x float> %x) nounwind readnone
; store <3 x float> %y, <3 x float> addrspace(1)* %out
; ret void
; }
-; FUNC-LABEL: @ftrunc_v4f32:
+; FUNC-LABEL: {{^}}ftrunc_v4f32:
; EG: TRUNC
; EG: TRUNC
; EG: TRUNC
; EG: TRUNC
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
define void @ftrunc_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %x) {
%y = call <4 x float> @llvm.trunc.v4f32(<4 x float> %x) nounwind readnone
store <4 x float> %y, <4 x float> addrspace(1)* %out
ret void
}
-; FUNC-LABEL: @ftrunc_v8f32:
+; FUNC-LABEL: {{^}}ftrunc_v8f32:
; EG: TRUNC
; EG: TRUNC
; EG: TRUNC
@@ -65,21 +65,21 @@ define void @ftrunc_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %x) {
; EG: TRUNC
; EG: TRUNC
; EG: TRUNC
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
define void @ftrunc_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %x) {
%y = call <8 x float> @llvm.trunc.v8f32(<8 x float> %x) nounwind readnone
store <8 x float> %y, <8 x float> addrspace(1)* %out
ret void
}
-; FUNC-LABEL: @ftrunc_v16f32:
+; FUNC-LABEL: {{^}}ftrunc_v16f32:
; EG: TRUNC
; EG: TRUNC
; EG: TRUNC
@@ -96,22 +96,22 @@ define void @ftrunc_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %x) {
; EG: TRUNC
; EG: TRUNC
; EG: TRUNC
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
define void @ftrunc_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %x) {
%y = call <16 x float> @llvm.trunc.v16f32(<16 x float> %x) nounwind readnone
store <16 x float> %y, <16 x float> addrspace(1)* %out
diff --git a/test/CodeGen/R600/gep-address-space.ll b/test/CodeGen/R600/gep-address-space.ll
index ab2c0bf92fe3..2d1892534dc5 100644
--- a/test/CodeGen/R600/gep-address-space.ll
+++ b/test/CodeGen/R600/gep-address-space.ll
@@ -1,29 +1,33 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck --check-prefix=SI --check-prefix=CHECK %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=CHECK %s
define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind {
-; CHECK-LABEL: @use_gep_address_space:
-; CHECK: V_MOV_B32_e32 [[PTR:v[0-9]+]], s{{[0-9]+}}
-; CHECK: DS_WRITE_B32 [[PTR]], v{{[0-9]+}}, 0x40
+; CHECK-LABEL: {{^}}use_gep_address_space:
+; CHECK: v_mov_b32_e32 [[PTR:v[0-9]+]], s{{[0-9]+}}
+; CHECK: ds_write_b32 [[PTR]], v{{[0-9]+}} offset:64
%p = getelementptr [1024 x i32] addrspace(3)* %array, i16 0, i16 16
store i32 99, i32 addrspace(3)* %p
ret void
}
define void @use_gep_address_space_large_offset([1024 x i32] addrspace(3)* %array) nounwind {
-; CHECK-LABEL: @use_gep_address_space_large_offset:
-; CHECK: S_ADD_I32
-; CHECK: DS_WRITE_B32
+; CHECK-LABEL: {{^}}use_gep_address_space_large_offset:
+; The LDS offset will be 65536 bytes, which is larger than the size of LDS on
+; SI, which is why it is being OR'd with the base pointer.
+; SI: s_or_b32
+; CI: s_add_i32
+; CHECK: ds_write_b32
%p = getelementptr [1024 x i32] addrspace(3)* %array, i16 0, i16 16384
store i32 99, i32 addrspace(3)* %p
ret void
}
define void @gep_as_vector_v4(<4 x [1024 x i32] addrspace(3)*> %array) nounwind {
-; CHECK-LABEL: @gep_as_vector_v4:
-; CHECK: S_ADD_I32
-; CHECK: S_ADD_I32
-; CHECK: S_ADD_I32
-; CHECK: S_ADD_I32
+; CHECK-LABEL: {{^}}gep_as_vector_v4:
+; CHECK: s_add_i32
+; CHECK: s_add_i32
+; CHECK: s_add_i32
+; CHECK: s_add_i32
%p = getelementptr <4 x [1024 x i32] addrspace(3)*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
%p0 = extractelement <4 x i32 addrspace(3)*> %p, i32 0
%p1 = extractelement <4 x i32 addrspace(3)*> %p, i32 1
@@ -37,9 +41,9 @@ define void @gep_as_vector_v4(<4 x [1024 x i32] addrspace(3)*> %array) nounwind
}
define void @gep_as_vector_v2(<2 x [1024 x i32] addrspace(3)*> %array) nounwind {
-; CHECK-LABEL: @gep_as_vector_v2:
-; CHECK: S_ADD_I32
-; CHECK: S_ADD_I32
+; CHECK-LABEL: {{^}}gep_as_vector_v2:
+; CHECK: s_add_i32
+; CHECK: s_add_i32
%p = getelementptr <2 x [1024 x i32] addrspace(3)*> %array, <2 x i16> zeroinitializer, <2 x i16> <i16 16, i16 16>
%p0 = extractelement <2 x i32 addrspace(3)*> %p, i32 0
%p1 = extractelement <2 x i32 addrspace(3)*> %p, i32 1
diff --git a/test/CodeGen/R600/global-directive.ll b/test/CodeGen/R600/global-directive.ll
new file mode 100644
index 000000000000..189510ad08fe
--- /dev/null
+++ b/test/CodeGen/R600/global-directive.ll
@@ -0,0 +1,14 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+; Make sure the GlobalDirective isn't merged with the function name
+
+; SI: .globl foo
+; SI: {{^}}foo:
+define void @foo(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+ %a = load i32 addrspace(1)* %in
+ %b = load i32 addrspace(1)* %b_ptr
+ %result = add i32 %a, %b
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/global-extload-i1.ll b/test/CodeGen/R600/global-extload-i1.ll
new file mode 100644
index 000000000000..940911e73453
--- /dev/null
+++ b/test/CodeGen/R600/global-extload-i1.ll
@@ -0,0 +1,301 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; XUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; FIXME: Evergreen broken
+
+; FUNC-LABEL: {{^}}zextload_global_i1_to_i32:
+; SI: buffer_load_ubyte
+; SI: buffer_store_dword
+; SI: s_endpgm
+define void @zextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+ %a = load i1 addrspace(1)* %in
+ %ext = zext i1 %a to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_i1_to_i32:
+; SI: buffer_load_ubyte
+; SI: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1{{$}}
+; SI: buffer_store_dword
+; SI: s_endpgm
+define void @sextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+ %a = load i1 addrspace(1)* %in
+ %ext = sext i1 %a to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v1i1_to_v1i32:
+; SI: s_endpgm
+define void @zextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <1 x i1> addrspace(1)* %in
+ %ext = zext <1 x i1> %load to <1 x i32>
+ store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v1i1_to_v1i32:
+; SI: s_endpgm
+define void @sextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <1 x i1> addrspace(1)* %in
+ %ext = sext <1 x i1> %load to <1 x i32>
+ store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v2i1_to_v2i32:
+; SI: s_endpgm
+define void @zextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <2 x i1> addrspace(1)* %in
+ %ext = zext <2 x i1> %load to <2 x i32>
+ store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v2i1_to_v2i32:
+; SI: s_endpgm
+define void @sextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <2 x i1> addrspace(1)* %in
+ %ext = sext <2 x i1> %load to <2 x i32>
+ store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v4i1_to_v4i32:
+; SI: s_endpgm
+define void @zextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <4 x i1> addrspace(1)* %in
+ %ext = zext <4 x i1> %load to <4 x i32>
+ store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v4i1_to_v4i32:
+; SI: s_endpgm
+define void @sextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <4 x i1> addrspace(1)* %in
+ %ext = sext <4 x i1> %load to <4 x i32>
+ store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v8i1_to_v8i32:
+; SI: s_endpgm
+define void @zextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <8 x i1> addrspace(1)* %in
+ %ext = zext <8 x i1> %load to <8 x i32>
+ store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v8i1_to_v8i32:
+; SI: s_endpgm
+define void @sextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <8 x i1> addrspace(1)* %in
+ %ext = sext <8 x i1> %load to <8 x i32>
+ store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v16i1_to_v16i32:
+; SI: s_endpgm
+define void @zextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <16 x i1> addrspace(1)* %in
+ %ext = zext <16 x i1> %load to <16 x i32>
+ store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v16i1_to_v16i32:
+; SI: s_endpgm
+define void @sextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <16 x i1> addrspace(1)* %in
+ %ext = sext <16 x i1> %load to <16 x i32>
+ store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+ ret void
+}
+
+; XFUNC-LABEL: {{^}}zextload_global_v32i1_to_v32i32:
+; XSI: s_endpgm
+; define void @zextload_global_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
+; %load = load <32 x i1> addrspace(1)* %in
+; %ext = zext <32 x i1> %load to <32 x i32>
+; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+; ret void
+; }
+
+; XFUNC-LABEL: {{^}}sextload_global_v32i1_to_v32i32:
+; XSI: s_endpgm
+; define void @sextload_global_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
+; %load = load <32 x i1> addrspace(1)* %in
+; %ext = sext <32 x i1> %load to <32 x i32>
+; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+; ret void
+; }
+
+; XFUNC-LABEL: {{^}}zextload_global_v64i1_to_v64i32:
+; XSI: s_endpgm
+; define void @zextload_global_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
+; %load = load <64 x i1> addrspace(1)* %in
+; %ext = zext <64 x i1> %load to <64 x i32>
+; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+; ret void
+; }
+
+; XFUNC-LABEL: {{^}}sextload_global_v64i1_to_v64i32:
+; XSI: s_endpgm
+; define void @sextload_global_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
+; %load = load <64 x i1> addrspace(1)* %in
+; %ext = sext <64 x i1> %load to <64 x i32>
+; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+; ret void
+; }
+
+; FUNC-LABEL: {{^}}zextload_global_i1_to_i64:
+; SI: buffer_load_ubyte [[LOAD:v[0-9]+]],
+; SI: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
+; SI: buffer_store_dwordx2
+define void @zextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+ %a = load i1 addrspace(1)* %in
+ %ext = zext i1 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_i1_to_i64:
+; SI: buffer_load_ubyte [[LOAD:v[0-9]+]],
+; SI: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}}
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]]
+; SI: buffer_store_dwordx2
+define void @sextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+ %a = load i1 addrspace(1)* %in
+ %ext = sext i1 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v1i1_to_v1i64:
+; SI: s_endpgm
+define void @zextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <1 x i1> addrspace(1)* %in
+ %ext = zext <1 x i1> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v1i1_to_v1i64:
+; SI: s_endpgm
+define void @sextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <1 x i1> addrspace(1)* %in
+ %ext = sext <1 x i1> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v2i1_to_v2i64:
+; SI: s_endpgm
+define void @zextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <2 x i1> addrspace(1)* %in
+ %ext = zext <2 x i1> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v2i1_to_v2i64:
+; SI: s_endpgm
+define void @sextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <2 x i1> addrspace(1)* %in
+ %ext = sext <2 x i1> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v4i1_to_v4i64:
+; SI: s_endpgm
+define void @zextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <4 x i1> addrspace(1)* %in
+ %ext = zext <4 x i1> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v4i1_to_v4i64:
+; SI: s_endpgm
+define void @sextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <4 x i1> addrspace(1)* %in
+ %ext = sext <4 x i1> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v8i1_to_v8i64:
+; SI: s_endpgm
+define void @zextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <8 x i1> addrspace(1)* %in
+ %ext = zext <8 x i1> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v8i1_to_v8i64:
+; SI: s_endpgm
+define void @sextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <8 x i1> addrspace(1)* %in
+ %ext = sext <8 x i1> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v16i1_to_v16i64:
+; SI: s_endpgm
+define void @zextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <16 x i1> addrspace(1)* %in
+ %ext = zext <16 x i1> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v16i1_to_v16i64:
+; SI: s_endpgm
+define void @sextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <16 x i1> addrspace(1)* %in
+ %ext = sext <16 x i1> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; XFUNC-LABEL: {{^}}zextload_global_v32i1_to_v32i64:
+; XSI: s_endpgm
+; define void @zextload_global_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
+; %load = load <32 x i1> addrspace(1)* %in
+; %ext = zext <32 x i1> %load to <32 x i64>
+; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+; ret void
+; }
+
+; XFUNC-LABEL: {{^}}sextload_global_v32i1_to_v32i64:
+; XSI: s_endpgm
+; define void @sextload_global_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
+; %load = load <32 x i1> addrspace(1)* %in
+; %ext = sext <32 x i1> %load to <32 x i64>
+; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+; ret void
+; }
+
+; XFUNC-LABEL: {{^}}zextload_global_v64i1_to_v64i64:
+; XSI: s_endpgm
+; define void @zextload_global_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
+; %load = load <64 x i1> addrspace(1)* %in
+; %ext = zext <64 x i1> %load to <64 x i64>
+; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+; ret void
+; }
+
+; XFUNC-LABEL: {{^}}sextload_global_v64i1_to_v64i64:
+; XSI: s_endpgm
+; define void @sextload_global_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
+; %load = load <64 x i1> addrspace(1)* %in
+; %ext = sext <64 x i1> %load to <64 x i64>
+; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+; ret void
+; }
diff --git a/test/CodeGen/R600/global-extload-i16.ll b/test/CodeGen/R600/global-extload-i16.ll
new file mode 100644
index 000000000000..838068470ff2
--- /dev/null
+++ b/test/CodeGen/R600/global-extload-i16.ll
@@ -0,0 +1,301 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; XUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; FIXME: cypress is broken because the bigger testcases spill and it's not implemented
+
+; FUNC-LABEL: {{^}}zextload_global_i16_to_i32:
+; SI: buffer_load_ushort
+; SI: buffer_store_dword
+; SI: s_endpgm
+define void @zextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
+ %a = load i16 addrspace(1)* %in
+ %ext = zext i16 %a to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_i16_to_i32:
+; SI: buffer_load_sshort
+; SI: buffer_store_dword
+; SI: s_endpgm
+define void @sextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
+ %a = load i16 addrspace(1)* %in
+ %ext = sext i16 %a to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v1i16_to_v1i32:
+; SI: buffer_load_ushort
+; SI: s_endpgm
+define void @zextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <1 x i16> addrspace(1)* %in
+ %ext = zext <1 x i16> %load to <1 x i32>
+ store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v1i16_to_v1i32:
+; SI: buffer_load_sshort
+; SI: s_endpgm
+define void @sextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <1 x i16> addrspace(1)* %in
+ %ext = sext <1 x i16> %load to <1 x i32>
+ store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i32:
+; SI: s_endpgm
+define void @zextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <2 x i16> addrspace(1)* %in
+ %ext = zext <2 x i16> %load to <2 x i32>
+ store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i32:
+; SI: s_endpgm
+define void @sextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <2 x i16> addrspace(1)* %in
+ %ext = sext <2 x i16> %load to <2 x i32>
+ store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i32:
+; SI: s_endpgm
+define void @zextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <4 x i16> addrspace(1)* %in
+ %ext = zext <4 x i16> %load to <4 x i32>
+ store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i32:
+; SI: s_endpgm
+define void @sextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <4 x i16> addrspace(1)* %in
+ %ext = sext <4 x i16> %load to <4 x i32>
+ store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i32:
+; SI: s_endpgm
+define void @zextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <8 x i16> addrspace(1)* %in
+ %ext = zext <8 x i16> %load to <8 x i32>
+ store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i32:
+; SI: s_endpgm
+define void @sextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <8 x i16> addrspace(1)* %in
+ %ext = sext <8 x i16> %load to <8 x i32>
+ store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i32:
+; SI: s_endpgm
+define void @zextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <16 x i16> addrspace(1)* %in
+ %ext = zext <16 x i16> %load to <16 x i32>
+ store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i32:
+; SI: s_endpgm
+define void @sextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <16 x i16> addrspace(1)* %in
+ %ext = sext <16 x i16> %load to <16 x i32>
+ store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i32:
+; SI: s_endpgm
+define void @zextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <32 x i16> addrspace(1)* %in
+ %ext = zext <32 x i16> %load to <32 x i32>
+ store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i32:
+; SI: s_endpgm
+define void @sextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <32 x i16> addrspace(1)* %in
+ %ext = sext <32 x i16> %load to <32 x i32>
+ store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i32:
+; SI: s_endpgm
+define void @zextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <64 x i16> addrspace(1)* %in
+ %ext = zext <64 x i16> %load to <64 x i32>
+ store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i32:
+; SI: s_endpgm
+define void @sextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <64 x i16> addrspace(1)* %in
+ %ext = sext <64 x i16> %load to <64 x i32>
+ store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_i16_to_i64:
+; SI: buffer_load_ushort v[[LO:[0-9]+]],
+; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
+; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
+define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
+ %a = load i16 addrspace(1)* %in
+ %ext = zext i16 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_i16_to_i64:
+; SI: buffer_load_sshort [[LOAD:v[0-9]+]],
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
+; SI: buffer_store_dwordx2
+define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
+ %a = load i16 addrspace(1)* %in
+ %ext = sext i16 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v1i16_to_v1i64:
+; SI: s_endpgm
+define void @zextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <1 x i16> addrspace(1)* %in
+ %ext = zext <1 x i16> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v1i16_to_v1i64:
+; SI: s_endpgm
+define void @sextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <1 x i16> addrspace(1)* %in
+ %ext = sext <1 x i16> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i64:
+; SI: s_endpgm
+define void @zextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <2 x i16> addrspace(1)* %in
+ %ext = zext <2 x i16> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i64:
+; SI: s_endpgm
+define void @sextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <2 x i16> addrspace(1)* %in
+ %ext = sext <2 x i16> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i64:
+; SI: s_endpgm
+define void @zextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <4 x i16> addrspace(1)* %in
+ %ext = zext <4 x i16> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i64:
+; SI: s_endpgm
+define void @sextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <4 x i16> addrspace(1)* %in
+ %ext = sext <4 x i16> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i64:
+; SI: s_endpgm
+define void @zextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <8 x i16> addrspace(1)* %in
+ %ext = zext <8 x i16> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i64:
+; SI: s_endpgm
+define void @sextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <8 x i16> addrspace(1)* %in
+ %ext = sext <8 x i16> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i64:
+; SI: s_endpgm
+define void @zextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <16 x i16> addrspace(1)* %in
+ %ext = zext <16 x i16> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i64:
+; SI: s_endpgm
+define void @sextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <16 x i16> addrspace(1)* %in
+ %ext = sext <16 x i16> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i64:
+; SI: s_endpgm
+define void @zextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <32 x i16> addrspace(1)* %in
+ %ext = zext <32 x i16> %load to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i64:
+; SI: s_endpgm
+define void @sextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <32 x i16> addrspace(1)* %in
+ %ext = sext <32 x i16> %load to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i64:
+; SI: s_endpgm
+define void @zextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <64 x i16> addrspace(1)* %in
+ %ext = zext <64 x i16> %load to <64 x i64>
+ store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i64:
+; SI: s_endpgm
+define void @sextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <64 x i16> addrspace(1)* %in
+ %ext = sext <64 x i16> %load to <64 x i64>
+ store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/global-extload-i32.ll b/test/CodeGen/R600/global-extload-i32.ll
new file mode 100644
index 000000000000..ce78c446c3ba
--- /dev/null
+++ b/test/CodeGen/R600/global-extload-i32.ll
@@ -0,0 +1,456 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}zextload_global_i32_to_i64:
+; SI: buffer_load_dword v[[LO:[0-9]+]],
+; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
+; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
+define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %a = load i32 addrspace(1)* %in
+ %ext = zext i32 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_i32_to_i64:
+; SI: buffer_load_dword [[LOAD:v[0-9]+]],
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
+; SI: buffer_store_dwordx2
+define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %a = load i32 addrspace(1)* %in
+ %ext = sext i32 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v1i32_to_v1i64:
+; SI: buffer_load_dword
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @zextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind {
+ %load = load <1 x i32> addrspace(1)* %in
+ %ext = zext <1 x i32> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v1i32_to_v1i64:
+; SI: buffer_load_dword
+; SI: v_ashrrev_i32
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @sextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind {
+ %load = load <1 x i32> addrspace(1)* %in
+ %ext = sext <1 x i32> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v2i32_to_v2i64:
+; SI: buffer_load_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @zextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
+ %load = load <2 x i32> addrspace(1)* %in
+ %ext = zext <2 x i32> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v2i32_to_v2i64:
+; SI: buffer_load_dwordx2
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI: s_endpgm
+define void @sextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
+ %load = load <2 x i32> addrspace(1)* %in
+ %ext = sext <2 x i32> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v4i32_to_v4i64:
+; SI: buffer_load_dwordx4
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @zextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
+ %load = load <4 x i32> addrspace(1)* %in
+ %ext = zext <4 x i32> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v4i32_to_v4i64:
+; SI: buffer_load_dwordx4
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI: s_endpgm
+define void @sextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
+ %load = load <4 x i32> addrspace(1)* %in
+ %ext = sext <4 x i32> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v8i32_to_v8i64:
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI: s_endpgm
+define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
+ %load = load <8 x i32> addrspace(1)* %in
+ %ext = zext <8 x i32> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v8i32_to_v8i64:
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI: s_endpgm
+define void @sextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
+ %load = load <8 x i32> addrspace(1)* %in
+ %ext = sext <8 x i32> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v16i32_to_v16i64:
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI: s_endpgm
+define void @sextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
+ %load = load <16 x i32> addrspace(1)* %in
+ %ext = sext <16 x i32> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v16i32_to_v16i64
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+
+; SI: s_endpgm
+define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
+ %load = load <16 x i32> addrspace(1)* %in
+ %ext = zext <16 x i32> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v32i32_to_v32i64:
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI: s_endpgm
+define void @sextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
+ %load = load <32 x i32> addrspace(1)* %in
+ %ext = sext <32 x i32> %load to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v32i32_to_v32i64:
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI: s_endpgm
+define void @zextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
+ %load = load <32 x i32> addrspace(1)* %in
+ %ext = zext <32 x i32> %load to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/global-extload-i8.ll b/test/CodeGen/R600/global-extload-i8.ll
new file mode 100644
index 000000000000..8d6042f1de02
--- /dev/null
+++ b/test/CodeGen/R600/global-extload-i8.ll
@@ -0,0 +1,298 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}zextload_global_i8_to_i32:
+; SI: buffer_load_ubyte
+; SI: buffer_store_dword
+; SI: s_endpgm
+define void @zextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
+ %a = load i8 addrspace(1)* %in
+ %ext = zext i8 %a to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_i8_to_i32:
+; SI: buffer_load_sbyte
+; SI: buffer_store_dword
+; SI: s_endpgm
+define void @sextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
+ %a = load i8 addrspace(1)* %in
+ %ext = sext i8 %a to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i32:
+; SI: s_endpgm
+define void @zextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <1 x i8> addrspace(1)* %in
+ %ext = zext <1 x i8> %load to <1 x i32>
+ store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i32:
+; SI: s_endpgm
+define void @sextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <1 x i8> addrspace(1)* %in
+ %ext = sext <1 x i8> %load to <1 x i32>
+ store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i32:
+; SI: s_endpgm
+define void @zextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <2 x i8> addrspace(1)* %in
+ %ext = zext <2 x i8> %load to <2 x i32>
+ store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i32:
+; SI: s_endpgm
+define void @sextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <2 x i8> addrspace(1)* %in
+ %ext = sext <2 x i8> %load to <2 x i32>
+ store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i32:
+; SI: s_endpgm
+define void @zextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <4 x i8> addrspace(1)* %in
+ %ext = zext <4 x i8> %load to <4 x i32>
+ store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i32:
+; SI: s_endpgm
+define void @sextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <4 x i8> addrspace(1)* %in
+ %ext = sext <4 x i8> %load to <4 x i32>
+ store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i32:
+; SI: s_endpgm
+define void @zextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <8 x i8> addrspace(1)* %in
+ %ext = zext <8 x i8> %load to <8 x i32>
+ store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i32:
+; SI: s_endpgm
+define void @sextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <8 x i8> addrspace(1)* %in
+ %ext = sext <8 x i8> %load to <8 x i32>
+ store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i32:
+; SI: s_endpgm
+define void @zextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <16 x i8> addrspace(1)* %in
+ %ext = zext <16 x i8> %load to <16 x i32>
+ store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i32:
+; SI: s_endpgm
+define void @sextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <16 x i8> addrspace(1)* %in
+ %ext = sext <16 x i8> %load to <16 x i32>
+ store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+ ret void
+}
+
+; XFUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i32:
+; XSI: s_endpgm
+; define void @zextload_global_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
+; %load = load <32 x i8> addrspace(1)* %in
+; %ext = zext <32 x i8> %load to <32 x i32>
+; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+; ret void
+; }
+
+; XFUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i32:
+; XSI: s_endpgm
+; define void @sextload_global_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
+; %load = load <32 x i8> addrspace(1)* %in
+; %ext = sext <32 x i8> %load to <32 x i32>
+; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+; ret void
+; }
+
+; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i32:
+; XSI: s_endpgm
+; define void @zextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
+; %load = load <64 x i8> addrspace(1)* %in
+; %ext = zext <64 x i8> %load to <64 x i32>
+; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+; ret void
+; }
+
+; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i32:
+; XSI: s_endpgm
+; define void @sextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
+; %load = load <64 x i8> addrspace(1)* %in
+; %ext = sext <64 x i8> %load to <64 x i32>
+; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+; ret void
+; }
+
+; FUNC-LABEL: {{^}}zextload_global_i8_to_i64:
+; SI: buffer_load_ubyte v[[LO:[0-9]+]],
+; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
+; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
+define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
+ %a = load i8 addrspace(1)* %in
+ %ext = zext i8 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_i8_to_i64:
+; SI: buffer_load_sbyte [[LOAD:v[0-9]+]],
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
+; SI: buffer_store_dwordx2
+define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
+ %a = load i8 addrspace(1)* %in
+ %ext = sext i8 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i64:
+; SI: s_endpgm
+define void @zextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <1 x i8> addrspace(1)* %in
+ %ext = zext <1 x i8> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i64:
+; SI: s_endpgm
+define void @sextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <1 x i8> addrspace(1)* %in
+ %ext = sext <1 x i8> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i64:
+; SI: s_endpgm
+define void @zextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <2 x i8> addrspace(1)* %in
+ %ext = zext <2 x i8> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i64:
+; SI: s_endpgm
+define void @sextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <2 x i8> addrspace(1)* %in
+ %ext = sext <2 x i8> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i64:
+; SI: s_endpgm
+define void @zextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <4 x i8> addrspace(1)* %in
+ %ext = zext <4 x i8> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i64:
+; SI: s_endpgm
+define void @sextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <4 x i8> addrspace(1)* %in
+ %ext = sext <4 x i8> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i64:
+; SI: s_endpgm
+define void @zextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <8 x i8> addrspace(1)* %in
+ %ext = zext <8 x i8> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i64:
+; SI: s_endpgm
+define void @sextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <8 x i8> addrspace(1)* %in
+ %ext = sext <8 x i8> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i64:
+; SI: s_endpgm
+define void @zextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <16 x i8> addrspace(1)* %in
+ %ext = zext <16 x i8> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i64:
+; SI: s_endpgm
+define void @sextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <16 x i8> addrspace(1)* %in
+ %ext = sext <16 x i8> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; XFUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i64:
+; XSI: s_endpgm
+; define void @zextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
+; %load = load <32 x i8> addrspace(1)* %in
+; %ext = zext <32 x i8> %load to <32 x i64>
+; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+; ret void
+; }
+
+; XFUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i64:
+; XSI: s_endpgm
+; define void @sextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
+; %load = load <32 x i8> addrspace(1)* %in
+; %ext = sext <32 x i8> %load to <32 x i64>
+; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+; ret void
+; }
+
+; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i64:
+; XSI: s_endpgm
+; define void @zextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
+; %load = load <64 x i8> addrspace(1)* %in
+; %ext = zext <64 x i8> %load to <64 x i64>
+; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+; ret void
+; }
+
+; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i64:
+; XSI: s_endpgm
+; define void @sextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
+; %load = load <64 x i8> addrspace(1)* %in
+; %ext = sext <64 x i8> %load to <64 x i64>
+; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+; ret void
+; }
diff --git a/test/CodeGen/R600/global-zero-initializer.ll b/test/CodeGen/R600/global-zero-initializer.ll
new file mode 100644
index 000000000000..031df59cd1e1
--- /dev/null
+++ b/test/CodeGen/R600/global-zero-initializer.ll
@@ -0,0 +1,12 @@
+; RUN: not llc -march=amdgcn -mcpu=SI < %s 2>&1 | FileCheck %s
+
+; CHECK: error: unsupported initializer for address space in load_init_global_global
+
+@lds = addrspace(1) global [256 x i32] zeroinitializer
+
+define void @load_init_global_global(i32 addrspace(1)* %out, i1 %p) {
+ %gep = getelementptr [256 x i32] addrspace(1)* @lds, i32 0, i32 10
+ %ld = load i32 addrspace(1)* %gep
+ store i32 %ld, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/global_atomics.ll b/test/CodeGen/R600/global_atomics.ll
new file mode 100644
index 000000000000..5a07a028f44f
--- /dev/null
+++ b/test/CodeGen/R600/global_atomics.ll
@@ -0,0 +1,801 @@
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}atomic_add_i32_offset:
+; SI: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+define void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %0 = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_add_i32_ret_offset:
+; SI: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+ %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %0 = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_add_i32_addr64_offset:
+; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+define void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %0 = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_add_i32_ret_addr64_offset:
+; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_add_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %0 = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_add_i32:
+; SI: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+define void @atomic_add_i32(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %0 = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_add_i32_ret:
+; SI: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dword [[RET]]
+define void @atomic_add_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+ %0 = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_add_i32_addr64:
+; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+define void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %0 = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_add_i32_ret_addr64:
+; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_add_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %0 = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_and_i32_offset:
+; SI: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+define void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %0 = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_and_i32_ret_offset:
+; SI: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_and_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+ %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %0 = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_and_i32_addr64_offset:
+; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+define void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %0 = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_and_i32_ret_addr64_offset:
+; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_and_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %0 = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_and_i32:
+; SI: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+define void @atomic_and_i32(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %0 = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_and_i32_ret:
+; SI: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dword [[RET]]
+define void @atomic_and_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+ %0 = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_and_i32_addr64:
+; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+define void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %0 = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_and_i32_ret_addr64:
+; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_and_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %0 = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_sub_i32_offset:
+; SI: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+define void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %0 = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_sub_i32_ret_offset:
+; SI: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_sub_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+ %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %0 = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_sub_i32_addr64_offset:
+; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+define void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %0 = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset:
+; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %0 = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_sub_i32:
+; SI: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+define void @atomic_sub_i32(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %0 = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_sub_i32_ret:
+; SI: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dword [[RET]]
+define void @atomic_sub_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+ %0 = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_sub_i32_addr64:
+; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+define void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %0 = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_sub_i32_ret_addr64:
+; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_sub_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %0 = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_max_i32_offset:
+; SI: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+define void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %0 = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_max_i32_ret_offset:
+; SI: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_max_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+ %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %0 = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_max_i32_addr64_offset:
+; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+define void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %0 = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_max_i32_ret_addr64_offset:
+; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_max_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %0 = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_max_i32:
+; SI: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+define void @atomic_max_i32(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %0 = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_max_i32_ret:
+; SI: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dword [[RET]]
+define void @atomic_max_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+ %0 = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_max_i32_addr64:
+; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+define void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %0 = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_max_i32_ret_addr64:
+; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_max_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %0 = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umax_i32_offset:
+; SI: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+define void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %0 = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umax_i32_ret_offset:
+; SI: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_umax_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+ %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %0 = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umax_i32_addr64_offset:
+; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+define void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %0 = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset:
+; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %0 = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umax_i32:
+; SI: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+define void @atomic_umax_i32(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %0 = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umax_i32_ret:
+; SI: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dword [[RET]]
+define void @atomic_umax_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+ %0 = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umax_i32_addr64:
+; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+define void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %0 = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umax_i32_ret_addr64:
+; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_umax_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %0 = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_min_i32_offset:
+; SI: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+define void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %0 = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_min_i32_ret_offset:
+; SI: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_min_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+ %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %0 = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_min_i32_addr64_offset:
+; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+define void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %0 = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_min_i32_ret_addr64_offset:
+; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_min_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %0 = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_min_i32:
+; SI: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+define void @atomic_min_i32(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %0 = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_min_i32_ret:
+; SI: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dword [[RET]]
+define void @atomic_min_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+ %0 = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_min_i32_addr64:
+; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+define void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %0 = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_min_i32_ret_addr64:
+; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_min_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %0 = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umin_i32_offset:
+; SI: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+define void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %0 = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umin_i32_ret_offset:
+; SI: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_umin_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+ %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %0 = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umin_i32_addr64_offset:
+; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+define void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %0 = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset:
+; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %0 = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umin_i32:
+; SI: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+define void @atomic_umin_i32(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %0 = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umin_i32_ret:
+; SI: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dword [[RET]]
+define void @atomic_umin_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+ %0 = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umin_i32_addr64:
+; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+define void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %0 = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umin_i32_ret_addr64:
+; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_umin_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %0 = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_or_i32_offset:
+; SI: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+define void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %0 = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_or_i32_ret_offset:
+; SI: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_or_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+ %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %0 = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_or_i32_addr64_offset:
+; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+define void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %0 = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_or_i32_ret_addr64_offset:
+; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_or_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %0 = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_or_i32:
+; SI: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+define void @atomic_or_i32(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %0 = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_or_i32_ret:
+; SI: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dword [[RET]]
+define void @atomic_or_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+ %0 = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_or_i32_addr64:
+; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+define void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %0 = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_or_i32_ret_addr64:
+; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_or_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %0 = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xchg_i32_offset:
+; SI: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+define void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %0 = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_offset:
+; SI: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+ %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %0 = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64_offset:
+; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+define void @atomic_xchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %0 = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset:
+; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %0 = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xchg_i32:
+; SI: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+define void @atomic_xchg_i32(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %0 = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xchg_i32_ret:
+; SI: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dword [[RET]]
+define void @atomic_xchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+ %0 = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64:
+; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+define void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %0 = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_addr64:
+; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_xchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %0 = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xor_i32_offset:
+; SI: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+define void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %0 = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xor_i32_ret_offset:
+; SI: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_xor_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+ %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %0 = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xor_i32_addr64_offset:
+; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+define void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %0 = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset:
+; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %0 = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xor_i32:
+; SI: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+define void @atomic_xor_i32(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %0 = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xor_i32_ret:
+; SI: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dword [[RET]]
+define void @atomic_xor_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+ %0 = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xor_i32_addr64:
+; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+define void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %0 = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xor_i32_ret_addr64:
+; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_xor_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %0 = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst
+ store i32 %0, i32 addrspace(1)* %out2
+ ret void
+}
diff --git a/test/CodeGen/R600/gv-const-addrspace-fail.ll b/test/CodeGen/R600/gv-const-addrspace-fail.ll
index ebd781107627..af0df413ca58 100644
--- a/test/CodeGen/R600/gv-const-addrspace-fail.ll
+++ b/test/CodeGen/R600/gv-const-addrspace-fail.ll
@@ -1,14 +1,13 @@
-; XFAIL: *
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; XUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
@a = internal addrspace(2) constant [1 x i8] [ i8 7 ], align 1
-; FUNC-LABEL: @test_i8
+; FUNC-LABEL: {{^}}test_i8:
; EG: CF_END
-; SI: BUFFER_STORE_BYTE
-; SI: S_ENDPGM
+; SI: buffer_store_byte
+; SI: s_endpgm
define void @test_i8( i32 %s, i8 addrspace(1)* %out) #3 {
%arrayidx = getelementptr inbounds [1 x i8] addrspace(2)* @a, i32 0, i32 %s
%1 = load i8 addrspace(2)* %arrayidx, align 1
@@ -18,10 +17,10 @@ define void @test_i8( i32 %s, i8 addrspace(1)* %out) #3 {
@b = internal addrspace(2) constant [1 x i16] [ i16 7 ], align 2
-; FUNC-LABEL: @test_i16
+; FUNC-LABEL: {{^}}test_i16:
; EG: CF_END
-; SI: BUFFER_STORE_SHORT
-; SI: S_ENDPGM
+; SI: buffer_store_short
+; SI: s_endpgm
define void @test_i16( i32 %s, i16 addrspace(1)* %out) #3 {
%arrayidx = getelementptr inbounds [1 x i16] addrspace(2)* @b, i32 0, i32 %s
%1 = load i16 addrspace(2)* %arrayidx, align 2
@@ -32,9 +31,9 @@ define void @test_i16( i32 %s, i16 addrspace(1)* %out) #3 {
%struct.bar = type { float, [5 x i8] }
; The illegal i8s aren't handled
-@struct_bar_gv = internal addrspace(2) unnamed_addr constant [1 x %struct.bar] [ %struct.bar { float 16.0, [5 x i8] [i8 0, i8 1, i8 2, i8 3, i8 4] } ]
+@struct_bar_gv = internal addrspace(2) constant [1 x %struct.bar] [ %struct.bar { float 16.0, [5 x i8] [i8 0, i8 1, i8 2, i8 3, i8 4] } ]
-; FUNC-LABEL: @struct_bar_gv_load
+; FUNC-LABEL: {{^}}struct_bar_gv_load:
define void @struct_bar_gv_load(i8 addrspace(1)* %out, i32 %index) {
%gep = getelementptr inbounds [1 x %struct.bar] addrspace(2)* @struct_bar_gv, i32 0, i32 0, i32 1, i32 %index
%load = load i8 addrspace(2)* %gep, align 1
@@ -49,7 +48,7 @@ define void @struct_bar_gv_load(i8 addrspace(1)* %out, i32 %index) {
<4 x i32> <i32 9, i32 10, i32 11, i32 12>,
<4 x i32> <i32 13, i32 14, i32 15, i32 16> ]
-; FUNC-LABEL: @array_vector_gv_load
+; FUNC-LABEL: {{^}}array_vector_gv_load:
define void @array_vector_gv_load(<4 x i32> addrspace(1)* %out, i32 %index) {
%gep = getelementptr inbounds [4 x <4 x i32>] addrspace(2)* @array_vector_gv, i32 0, i32 %index
%load = load <4 x i32> addrspace(2)* %gep, align 16
diff --git a/test/CodeGen/R600/gv-const-addrspace.ll b/test/CodeGen/R600/gv-const-addrspace.ll
index e0ac317f9986..c58e5846d98c 100644
--- a/test/CodeGen/R600/gv-const-addrspace.ll
+++ b/test/CodeGen/R600/gv-const-addrspace.ll
@@ -1,14 +1,14 @@
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
@b = internal addrspace(2) constant [1 x i16] [ i16 7 ], align 2
@float_gv = internal unnamed_addr addrspace(2) constant [5 x float] [float 0.0, float 1.0, float 2.0, float 3.0, float 4.0], align 4
-; FUNC-LABEL: @float
-; FIXME: We should be using S_LOAD_DWORD here.
-; SI: BUFFER_LOAD_DWORD
+; FUNC-LABEL: {{^}}float:
+; FIXME: We should be using s_load_dword here.
+; SI: buffer_load_dword
; EG-DAG: MOV {{\** *}}T2.X
; EG-DAG: MOV {{\** *}}T3.X
@@ -27,10 +27,10 @@ entry:
@i32_gv = internal unnamed_addr addrspace(2) constant [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4], align 4
-; FUNC-LABEL: @i32
+; FUNC-LABEL: {{^}}i32:
-; FIXME: We should be using S_LOAD_DWORD here.
-; SI: BUFFER_LOAD_DWORD
+; FIXME: We should be using s_load_dword here.
+; SI: buffer_load_dword
; EG-DAG: MOV {{\** *}}T2.X
; EG-DAG: MOV {{\** *}}T3.X
@@ -52,8 +52,8 @@ entry:
@struct_foo_gv = internal unnamed_addr addrspace(2) constant [1 x %struct.foo] [ %struct.foo { float 16.0, [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4] } ]
-; FUNC-LABEL: @struct_foo_gv_load
-; SI: S_LOAD_DWORD
+; FUNC-LABEL: {{^}}struct_foo_gv_load:
+; SI: s_load_dword
define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
%gep = getelementptr inbounds [1 x %struct.foo] addrspace(2)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index
@@ -67,9 +67,9 @@ define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
<1 x i32> <i32 3>,
<1 x i32> <i32 4> ]
-; FUNC-LABEL: @array_v1_gv_load
-; FIXME: We should be using S_LOAD_DWORD here.
-; SI: BUFFER_LOAD_DWORD
+; FUNC-LABEL: {{^}}array_v1_gv_load:
+; FIXME: We should be using s_load_dword here.
+; SI: buffer_load_dword
define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) {
%gep = getelementptr inbounds [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index
%load = load <1 x i32> addrspace(2)* %gep, align 4
diff --git a/test/CodeGen/R600/half.ll b/test/CodeGen/R600/half.ll
index 42aa4faa99f4..cb7a94a0b859 100644
--- a/test/CodeGen/R600/half.ll
+++ b/test/CodeGen/R600/half.ll
@@ -1,18 +1,18 @@
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI | FileCheck %s
define void @test_load_store(half addrspace(1)* %in, half addrspace(1)* %out) {
-; CHECK-LABEL: @test_load_store
-; CHECK: BUFFER_LOAD_USHORT [[TMP:v[0-9]+]]
-; CHECK: BUFFER_STORE_SHORT [[TMP]]
+; CHECK-LABEL: {{^}}test_load_store:
+; CHECK: buffer_load_ushort [[TMP:v[0-9]+]]
+; CHECK: buffer_store_short [[TMP]]
%val = load half addrspace(1)* %in
store half %val, half addrspace(1) * %out
ret void
}
define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %out) {
-; CHECK-LABEL: @test_bitcast_from_half
-; CHECK: BUFFER_LOAD_USHORT [[TMP:v[0-9]+]]
-; CHECK: BUFFER_STORE_SHORT [[TMP]]
+; CHECK-LABEL: {{^}}test_bitcast_from_half:
+; CHECK: buffer_load_ushort [[TMP:v[0-9]+]]
+; CHECK: buffer_store_short [[TMP]]
%val = load half addrspace(1) * %in
%val_int = bitcast half %val to i16
store i16 %val_int, i16 addrspace(1)* %out
@@ -20,9 +20,9 @@ define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %o
}
define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in) {
-; CHECK-LABEL: @test_bitcast_to_half
-; CHECK: BUFFER_LOAD_USHORT [[TMP:v[0-9]+]]
-; CHECK: BUFFER_STORE_SHORT [[TMP]]
+; CHECK-LABEL: {{^}}test_bitcast_to_half:
+; CHECK: buffer_load_ushort [[TMP:v[0-9]+]]
+; CHECK: buffer_store_short [[TMP]]
%val = load i16 addrspace(1)* %in
%val_fp = bitcast i16 %val to half
store half %val_fp, half addrspace(1)* %out
@@ -30,8 +30,8 @@ define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in
}
define void @test_extend32(half addrspace(1)* %in, float addrspace(1)* %out) {
-; CHECK-LABEL: @test_extend32
-; CHECK: V_CVT_F32_F16_e32
+; CHECK-LABEL: {{^}}test_extend32:
+; CHECK: v_cvt_f32_f16_e32
%val16 = load half addrspace(1)* %in
%val32 = fpext half %val16 to float
@@ -40,9 +40,9 @@ define void @test_extend32(half addrspace(1)* %in, float addrspace(1)* %out) {
}
define void @test_extend64(half addrspace(1)* %in, double addrspace(1)* %out) {
-; CHECK-LABEL: @test_extend64
-; CHECK: V_CVT_F32_F16_e32
-; CHECK: V_CVT_F64_F32_e32
+; CHECK-LABEL: {{^}}test_extend64:
+; CHECK: v_cvt_f32_f16_e32
+; CHECK: v_cvt_f64_f32_e32
%val16 = load half addrspace(1)* %in
%val64 = fpext half %val16 to double
@@ -51,8 +51,8 @@ define void @test_extend64(half addrspace(1)* %in, double addrspace(1)* %out) {
}
define void @test_trunc32(float addrspace(1)* %in, half addrspace(1)* %out) {
-; CHECK-LABEL: @test_trunc32
-; CHECK: V_CVT_F16_F32_e32
+; CHECK-LABEL: {{^}}test_trunc32:
+; CHECK: v_cvt_f16_f32_e32
%val32 = load float addrspace(1)* %in
%val16 = fptrunc float %val32 to half
diff --git a/test/CodeGen/R600/hsa.ll b/test/CodeGen/R600/hsa.ll
new file mode 100644
index 000000000000..2e79866362ac
--- /dev/null
+++ b/test/CodeGen/R600/hsa.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=r600--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s
+
+; HSA: {{^}}simple:
+; Make sure we are setting the ATC bit:
+; HSA: s_mov_b32 s[[HI:[0-9]]], 0x100f000
+; HSA: buffer_store_dword v{{[0-9]+}}, s[0:[[HI]]], 0
+
+define void @simple(i32 addrspace(1)* %out) {
+entry:
+ store i32 0, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/i1-copy-implicit-def.ll b/test/CodeGen/R600/i1-copy-implicit-def.ll
new file mode 100644
index 000000000000..51e230196bf6
--- /dev/null
+++ b/test/CodeGen/R600/i1-copy-implicit-def.ll
@@ -0,0 +1,21 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+; SILowerI1Copies was not handling IMPLICIT_DEF
+; SI-LABEL: {{^}}br_implicit_def:
+; SI: BB#0:
+; SI-NEXT: s_and_saveexec_b64
+; SI-NEXT: s_xor_b64
+; SI-NEXT: BB#1:
+define void @br_implicit_def(i32 addrspace(1)* %out, i32 %arg) #0 {
+bb:
+ br i1 undef, label %bb1, label %bb2
+
+bb1:
+ store volatile i32 123, i32 addrspace(1)* %out
+ ret void
+
+bb2:
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/R600/i1-copy-phi.ll b/test/CodeGen/R600/i1-copy-phi.ll
new file mode 100644
index 000000000000..8b761710f9d2
--- /dev/null
+++ b/test/CodeGen/R600/i1-copy-phi.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+; SI-LABEL: {{^}}br_i1_phi:
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
+; SI: s_and_saveexec_b64
+; SI: s_xor_b64
+; SI: v_mov_b32_e32 [[REG]], -1{{$}}
+; SI: v_cmp_ne_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[REG]], 0
+; SI: s_and_saveexec_b64
+; SI: s_xor_b64
+; SI: s_endpgm
+define void @br_i1_phi(i32 %arg, i1 %arg1) #0 {
+bb:
+ br i1 %arg1, label %bb2, label %bb3
+
+bb2: ; preds = %bb
+ br label %bb3
+
+bb3: ; preds = %bb2, %bb
+ %tmp = phi i1 [ true, %bb2 ], [ false, %bb ]
+ br i1 %tmp, label %bb4, label %bb6
+
+bb4: ; preds = %bb3
+ %tmp5 = mul i32 undef, %arg
+ br label %bb6
+
+bb6: ; preds = %bb4, %bb3
+ ret void
+}
diff --git a/test/CodeGen/R600/icmp64.ll b/test/CodeGen/R600/icmp64.ll
index c9e62ff934ee..ed0f221b87b1 100644
--- a/test/CodeGen/R600/icmp64.ll
+++ b/test/CodeGen/R600/icmp64.ll
@@ -1,7 +1,7 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-; SI-LABEL: @test_i64_eq:
-; SI: V_CMP_EQ_I64
+; SI-LABEL: {{^}}test_i64_eq:
+; SI: v_cmp_eq_i64
define void @test_i64_eq(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%cmp = icmp eq i64 %a, %b
%result = sext i1 %cmp to i32
@@ -9,8 +9,8 @@ define void @test_i64_eq(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
ret void
}
-; SI-LABEL: @test_i64_ne:
-; SI: V_CMP_NE_I64
+; SI-LABEL: {{^}}test_i64_ne:
+; SI: v_cmp_ne_i64
define void @test_i64_ne(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%cmp = icmp ne i64 %a, %b
%result = sext i1 %cmp to i32
@@ -18,8 +18,8 @@ define void @test_i64_ne(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
ret void
}
-; SI-LABEL: @test_i64_slt:
-; SI: V_CMP_LT_I64
+; SI-LABEL: {{^}}test_i64_slt:
+; SI: v_cmp_lt_i64
define void @test_i64_slt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%cmp = icmp slt i64 %a, %b
%result = sext i1 %cmp to i32
@@ -27,8 +27,8 @@ define void @test_i64_slt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
ret void
}
-; SI-LABEL: @test_i64_ult:
-; SI: V_CMP_LT_U64
+; SI-LABEL: {{^}}test_i64_ult:
+; SI: v_cmp_lt_u64
define void @test_i64_ult(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%cmp = icmp ult i64 %a, %b
%result = sext i1 %cmp to i32
@@ -36,8 +36,8 @@ define void @test_i64_ult(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
ret void
}
-; SI-LABEL: @test_i64_sle:
-; SI: V_CMP_LE_I64
+; SI-LABEL: {{^}}test_i64_sle:
+; SI: v_cmp_le_i64
define void @test_i64_sle(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%cmp = icmp sle i64 %a, %b
%result = sext i1 %cmp to i32
@@ -45,8 +45,8 @@ define void @test_i64_sle(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
ret void
}
-; SI-LABEL: @test_i64_ule:
-; SI: V_CMP_LE_U64
+; SI-LABEL: {{^}}test_i64_ule:
+; SI: v_cmp_le_u64
define void @test_i64_ule(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%cmp = icmp ule i64 %a, %b
%result = sext i1 %cmp to i32
@@ -54,8 +54,8 @@ define void @test_i64_ule(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
ret void
}
-; SI-LABEL: @test_i64_sgt:
-; SI: V_CMP_GT_I64
+; SI-LABEL: {{^}}test_i64_sgt:
+; SI: v_cmp_gt_i64
define void @test_i64_sgt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%cmp = icmp sgt i64 %a, %b
%result = sext i1 %cmp to i32
@@ -63,8 +63,8 @@ define void @test_i64_sgt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
ret void
}
-; SI-LABEL: @test_i64_ugt:
-; SI: V_CMP_GT_U64
+; SI-LABEL: {{^}}test_i64_ugt:
+; SI: v_cmp_gt_u64
define void @test_i64_ugt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%cmp = icmp ugt i64 %a, %b
%result = sext i1 %cmp to i32
@@ -72,8 +72,8 @@ define void @test_i64_ugt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
ret void
}
-; SI-LABEL: @test_i64_sge:
-; SI: V_CMP_GE_I64
+; SI-LABEL: {{^}}test_i64_sge:
+; SI: v_cmp_ge_i64
define void @test_i64_sge(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%cmp = icmp sge i64 %a, %b
%result = sext i1 %cmp to i32
@@ -81,8 +81,8 @@ define void @test_i64_sge(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
ret void
}
-; SI-LABEL: @test_i64_uge:
-; SI: V_CMP_GE_U64
+; SI-LABEL: {{^}}test_i64_uge:
+; SI: v_cmp_ge_u64
define void @test_i64_uge(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%cmp = icmp uge i64 %a, %b
%result = sext i1 %cmp to i32
diff --git a/test/CodeGen/R600/imm.ll b/test/CodeGen/R600/imm.ll
index b047315be71a..6e4fa3cc60cb 100644
--- a/test/CodeGen/R600/imm.ll
+++ b/test/CodeGen/R600/imm.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
; Use a 64-bit value with lo bits that can be represented as an inline constant
-; CHECK: @i64_imm_inline_lo
-; CHECK: S_MOV_B32 [[LO:s[0-9]+]], 5
-; CHECK: V_MOV_B32_e32 v[[LO_VGPR:[0-9]+]], [[LO]]
-; CHECK: BUFFER_STORE_DWORDX2 v{{\[}}[[LO_VGPR]]:
+; CHECK-LABEL: {{^}}i64_imm_inline_lo:
+; CHECK: s_mov_b32 [[LO:s[0-9]+]], 5
+; CHECK: v_mov_b32_e32 v[[LO_VGPR:[0-9]+]], [[LO]]
+; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VGPR]]:
define void @i64_imm_inline_lo(i64 addrspace(1) *%out) {
entry:
store i64 1311768464867721221, i64 addrspace(1) *%out ; 0x1234567800000005
@@ -12,12 +12,473 @@ entry:
}
; Use a 64-bit value with hi bits that can be represented as an inline constant
-; CHECK: @i64_imm_inline_hi
-; CHECK: S_MOV_B32 [[HI:s[0-9]+]], 5
-; CHECK: V_MOV_B32_e32 v[[HI_VGPR:[0-9]+]], [[HI]]
-; CHECK: BUFFER_STORE_DWORDX2 v{{\[[0-9]+:}}[[HI_VGPR]]
+; CHECK-LABEL: {{^}}i64_imm_inline_hi:
+; CHECK: s_mov_b32 [[HI:s[0-9]+]], 5
+; CHECK: v_mov_b32_e32 v[[HI_VGPR:[0-9]+]], [[HI]]
+; CHECK: buffer_store_dwordx2 v{{\[[0-9]+:}}[[HI_VGPR]]
define void @i64_imm_inline_hi(i64 addrspace(1) *%out) {
entry:
store i64 21780256376, i64 addrspace(1) *%out ; 0x0000000512345678
ret void
}
+
+; CHECK-LABEL: {{^}}store_inline_imm_0.0_f32
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @store_inline_imm_0.0_f32(float addrspace(1)* %out) {
+ store float 0.0, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}store_imm_neg_0.0_f32
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @store_imm_neg_0.0_f32(float addrspace(1)* %out) {
+ store float -0.0, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}store_inline_imm_0.5_f32
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0.5{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @store_inline_imm_0.5_f32(float addrspace(1)* %out) {
+ store float 0.5, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}store_inline_imm_m_0.5_f32
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -0.5{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @store_inline_imm_m_0.5_f32(float addrspace(1)* %out) {
+ store float -0.5, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}store_inline_imm_1.0_f32
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @store_inline_imm_1.0_f32(float addrspace(1)* %out) {
+ store float 1.0, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}store_inline_imm_m_1.0_f32
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -1.0{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @store_inline_imm_m_1.0_f32(float addrspace(1)* %out) {
+ store float -1.0, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}store_inline_imm_2.0_f32
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 2.0{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @store_inline_imm_2.0_f32(float addrspace(1)* %out) {
+ store float 2.0, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}store_inline_imm_m_2.0_f32
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -2.0{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @store_inline_imm_m_2.0_f32(float addrspace(1)* %out) {
+ store float -2.0, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}store_inline_imm_4.0_f32
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 4.0{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @store_inline_imm_4.0_f32(float addrspace(1)* %out) {
+ store float 4.0, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}store_inline_imm_m_4.0_f32
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -4.0{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @store_inline_imm_m_4.0_f32(float addrspace(1)* %out) {
+ store float -4.0, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}store_literal_imm_f32:
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0x45800000
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @store_literal_imm_f32(float addrspace(1)* %out) {
+ store float 4096.0, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_0.0_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 0, [[VAL]]{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_0.0_f32(float addrspace(1)* %out, float %x) {
+ %y = fadd float %x, 0.0
+ store float %y, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_0.5_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 0.5, [[VAL]]{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_0.5_f32(float addrspace(1)* %out, float %x) {
+ %y = fadd float %x, 0.5
+ store float %y, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -0.5, [[VAL]]{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_neg_0.5_f32(float addrspace(1)* %out, float %x) {
+ %y = fadd float %x, -0.5
+ store float %y, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_1.0_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 1.0, [[VAL]]{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_1.0_f32(float addrspace(1)* %out, float %x) {
+ %y = fadd float %x, 1.0
+ store float %y, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -1.0, [[VAL]]{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_neg_1.0_f32(float addrspace(1)* %out, float %x) {
+ %y = fadd float %x, -1.0
+ store float %y, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_2.0_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 2.0, [[VAL]]{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_2.0_f32(float addrspace(1)* %out, float %x) {
+ %y = fadd float %x, 2.0
+ store float %y, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -2.0, [[VAL]]{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_neg_2.0_f32(float addrspace(1)* %out, float %x) {
+ %y = fadd float %x, -2.0
+ store float %y, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_4.0_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 4.0, [[VAL]]{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_4.0_f32(float addrspace(1)* %out, float %x) {
+ %y = fadd float %x, 4.0
+ store float %y, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -4.0, [[VAL]]{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_neg_4.0_f32(float addrspace(1)* %out, float %x) {
+ %y = fadd float %x, -4.0
+ store float %y, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: @commute_add_inline_imm_0.5_f32
+; CHECK: buffer_load_dword [[VAL:v[0-9]+]]
+; CHECK: v_add_f32_e32 [[REG:v[0-9]+]], 0.5, [[VAL]]
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @commute_add_inline_imm_0.5_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
+ %x = load float addrspace(1)* %in
+ %y = fadd float %x, 0.5
+ store float %y, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: @commute_add_literal_f32
+; CHECK: buffer_load_dword [[VAL:v[0-9]+]]
+; CHECK: v_add_f32_e32 [[REG:v[0-9]+]], 0x44800000, [[VAL]]
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @commute_add_literal_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
+ %x = load float addrspace(1)* %in
+ %y = fadd float %x, 1024.0
+ store float %y, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_1_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 1, [[VAL]]{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_1_f32(float addrspace(1)* %out, float %x) {
+ %y = fadd float %x, 0x36a0000000000000
+ store float %y, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_2_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 2, [[VAL]]{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_2_f32(float addrspace(1)* %out, float %x) {
+ %y = fadd float %x, 0x36b0000000000000
+ store float %y, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_16_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 16, [[VAL]]
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_16_f32(float addrspace(1)* %out, float %x) {
+ %y = fadd float %x, 0x36e0000000000000
+ store float %y, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_neg_1_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -1, [[VAL]]
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_neg_1_f32(float addrspace(1)* %out, float %x) {
+ %y = fadd float %x, 0xffffffffe0000000
+ store float %y, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_neg_2_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -2, [[VAL]]
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_neg_2_f32(float addrspace(1)* %out, float %x) {
+ %y = fadd float %x, 0xffffffffc0000000
+ store float %y, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_neg_16_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -16, [[VAL]]
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_neg_16_f32(float addrspace(1)* %out, float %x) {
+ %y = fadd float %x, 0xfffffffe00000000
+ store float %y, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_63_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 63, [[VAL]]
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_63_f32(float addrspace(1)* %out, float %x) {
+ %y = fadd float %x, 0x36ff800000000000
+ store float %y, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_64_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 64, [[VAL]]
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_64_f32(float addrspace(1)* %out, float %x) {
+ %y = fadd float %x, 0x3700000000000000
+ store float %y, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_0.0_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_0.0_f64(double addrspace(1)* %out, double %x) {
+ %y = fadd double %x, 0.0
+ store double %y, double addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_0.5_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0.5, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_0.5_f64(double addrspace(1)* %out, double %x) {
+ %y = fadd double %x, 0.5
+ store double %y, double addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -0.5, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_neg_0.5_f64(double addrspace(1)* %out, double %x) {
+ %y = fadd double %x, -0.5
+ store double %y, double addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_1.0_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 1.0, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_1.0_f64(double addrspace(1)* %out, double %x) {
+ %y = fadd double %x, 1.0
+ store double %y, double addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -1.0, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_neg_1.0_f64(double addrspace(1)* %out, double %x) {
+ %y = fadd double %x, -1.0
+ store double %y, double addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_2.0_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 2.0, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_2.0_f64(double addrspace(1)* %out, double %x) {
+ %y = fadd double %x, 2.0
+ store double %y, double addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -2.0, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_neg_2.0_f64(double addrspace(1)* %out, double %x) {
+ %y = fadd double %x, -2.0
+ store double %y, double addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_4.0_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 4.0, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_4.0_f64(double addrspace(1)* %out, double %x) {
+ %y = fadd double %x, 4.0
+ store double %y, double addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -4.0, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_neg_4.0_f64(double addrspace(1)* %out, double %x) {
+ %y = fadd double %x, -4.0
+ store double %y, double addrspace(1)* %out
+ ret void
+}
+
+
+; CHECK-LABEL: {{^}}add_inline_imm_1_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 1, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_1_f64(double addrspace(1)* %out, double %x) {
+ %y = fadd double %x, 0x0000000000000001
+ store double %y, double addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_2_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 2, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_2_f64(double addrspace(1)* %out, double %x) {
+ %y = fadd double %x, 0x0000000000000002
+ store double %y, double addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_16_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 16, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_16_f64(double addrspace(1)* %out, double %x) {
+ %y = fadd double %x, 0x0000000000000010
+ store double %y, double addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_neg_1_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -1, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_neg_1_f64(double addrspace(1)* %out, double %x) {
+ %y = fadd double %x, 0xffffffffffffffff
+ store double %y, double addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_neg_2_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -2, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_neg_2_f64(double addrspace(1)* %out, double %x) {
+ %y = fadd double %x, 0xfffffffffffffffe
+ store double %y, double addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_neg_16_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -16, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_neg_16_f64(double addrspace(1)* %out, double %x) {
+ %y = fadd double %x, 0xfffffffffffffff0
+ store double %y, double addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_63_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 63, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_63_f64(double addrspace(1)* %out, double %x) {
+ %y = fadd double %x, 0x000000000000003F
+ store double %y, double addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_64_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 64, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_64_f64(double addrspace(1)* %out, double %x) {
+ %y = fadd double %x, 0x0000000000000040
+ store double %y, double addrspace(1)* %out
+ ret void
+}
+
+
+; CHECK-LABEL: {{^}}store_inline_imm_0.0_f64
+; CHECK: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0
+; CHECK: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0
+; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
+define void @store_inline_imm_0.0_f64(double addrspace(1)* %out) {
+ store double 0.0, double addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/indirect-addressing-si.ll b/test/CodeGen/R600/indirect-addressing-si.ll
index 169d69b7c25c..db597a363775 100644
--- a/test/CodeGen/R600/indirect-addressing-si.ll
+++ b/test/CodeGen/R600/indirect-addressing-si.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
; Tests for indirect addressing on SI, which is implemented using dynamic
; indexing of vectors.
; CHECK: extract_w_offset
-; CHECK: S_MOV_B32 m0
-; CHECK-NEXT: V_MOVRELS_B32_e32
+; CHECK: s_mov_b32 m0
+; CHECK-NEXT: v_movrels_b32_e32
define void @extract_w_offset(float addrspace(1)* %out, i32 %in) {
entry:
%0 = add i32 %in, 1
@@ -15,8 +15,8 @@ entry:
}
; CHECK: extract_wo_offset
-; CHECK: S_MOV_B32 m0
-; CHECK-NEXT: V_MOVRELS_B32_e32
+; CHECK: s_mov_b32 m0
+; CHECK-NEXT: v_movrels_b32_e32
define void @extract_wo_offset(float addrspace(1)* %out, i32 %in) {
entry:
%0 = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %in
@@ -25,8 +25,8 @@ entry:
}
; CHECK: insert_w_offset
-; CHECK: S_MOV_B32 m0
-; CHECK-NEXT: V_MOVRELD_B32_e32
+; CHECK: s_mov_b32 m0
+; CHECK-NEXT: v_movreld_b32_e32
define void @insert_w_offset(float addrspace(1)* %out, i32 %in) {
entry:
%0 = add i32 %in, 1
@@ -37,8 +37,8 @@ entry:
}
; CHECK: insert_wo_offset
-; CHECK: S_MOV_B32 m0
-; CHECK-NEXT: V_MOVRELD_B32_e32
+; CHECK: s_mov_b32 m0
+; CHECK-NEXT: v_movreld_b32_e32
define void @insert_wo_offset(float addrspace(1)* %out, i32 %in) {
entry:
%0 = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %in
diff --git a/test/CodeGen/R600/indirect-private-64.ll b/test/CodeGen/R600/indirect-private-64.ll
index 5747434935b3..24006f8799b2 100644
--- a/test/CodeGen/R600/indirect-private-64.ll
+++ b/test/CodeGen/R600/indirect-private-64.ll
@@ -1,16 +1,16 @@
-; RUN: llc -march=r600 -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s
-; RUN: llc -march=r600 -mcpu=SI -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
declare void @llvm.AMDGPU.barrier.local() noduplicate nounwind
-; SI-LABEL: @private_access_f64_alloca:
+; SI-LABEL: {{^}}private_access_f64_alloca:
-; SI-ALLOCA: BUFFER_STORE_DWORDX2
-; SI-ALLOCA: BUFFER_LOAD_DWORDX2
+; SI-ALLOCA: buffer_store_dwordx2
+; SI-ALLOCA: buffer_load_dwordx2
-; SI-PROMOTE: DS_WRITE_B64
-; SI-PROMOTE: DS_READ_B64
+; SI-PROMOTE: ds_write_b64
+; SI-PROMOTE: ds_read_b64
define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in, i32 %b) nounwind {
%val = load double addrspace(1)* %in, align 8
%array = alloca double, i32 16, align 8
@@ -22,19 +22,19 @@ define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double
ret void
}
-; SI-LABEL: @private_access_v2f64_alloca:
+; SI-LABEL: {{^}}private_access_v2f64_alloca:
-; SI-ALLOCA: BUFFER_STORE_DWORDX4
-; SI-ALLOCA: BUFFER_LOAD_DWORDX4
+; SI-ALLOCA: buffer_store_dwordx4
+; SI-ALLOCA: buffer_load_dwordx4
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_READ_B32
-; SI-PROMOTE: DS_READ_B32
-; SI-PROMOTE: DS_READ_B32
-; SI-PROMOTE: DS_READ_B32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_read_b32
+; SI-PROMOTE: ds_read_b32
+; SI-PROMOTE: ds_read_b32
+; SI-PROMOTE: ds_read_b32
define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) nounwind {
%val = load <2 x double> addrspace(1)* %in, align 16
%array = alloca <2 x double>, i32 16, align 16
@@ -46,13 +46,13 @@ define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out
ret void
}
-; SI-LABEL: @private_access_i64_alloca:
+; SI-LABEL: {{^}}private_access_i64_alloca:
-; SI-ALLOCA: BUFFER_STORE_DWORDX2
-; SI-ALLOCA: BUFFER_LOAD_DWORDX2
+; SI-ALLOCA: buffer_store_dwordx2
+; SI-ALLOCA: buffer_load_dwordx2
-; SI-PROMOTE: DS_WRITE_B64
-; SI-PROMOTE: DS_READ_B64
+; SI-PROMOTE: ds_write_b64
+; SI-PROMOTE: ds_read_b64
define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i32 %b) nounwind {
%val = load i64 addrspace(1)* %in, align 8
%array = alloca i64, i32 16, align 8
@@ -64,19 +64,19 @@ define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrs
ret void
}
-; SI-LABEL: @private_access_v2i64_alloca:
+; SI-LABEL: {{^}}private_access_v2i64_alloca:
-; SI-ALLOCA: BUFFER_STORE_DWORDX4
-; SI-ALLOCA: BUFFER_LOAD_DWORDX4
+; SI-ALLOCA: buffer_store_dwordx4
+; SI-ALLOCA: buffer_load_dwordx4
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_READ_B32
-; SI-PROMOTE: DS_READ_B32
-; SI-PROMOTE: DS_READ_B32
-; SI-PROMOTE: DS_READ_B32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_read_b32
+; SI-PROMOTE: ds_read_b32
+; SI-PROMOTE: ds_read_b32
+; SI-PROMOTE: ds_read_b32
define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) nounwind {
%val = load <2 x i64> addrspace(1)* %in, align 16
%array = alloca <2 x i64>, i32 16, align 16
diff --git a/test/CodeGen/R600/infinite-loop.ll b/test/CodeGen/R600/infinite-loop.ll
index 68ffaae1c428..0f82a7df6098 100644
--- a/test/CodeGen/R600/infinite-loop.ll
+++ b/test/CodeGen/R600/infinite-loop.ll
@@ -1,11 +1,11 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-; SI-LABEL: @infinite_loop:
-; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x3e7
+; SI-LABEL: {{^}}infinite_loop:
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7
; SI: BB0_1:
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: S_WAITCNT vmcnt(0) expcnt(0)
-; SI: S_BRANCH BB0_1
+; SI: buffer_store_dword [[REG]]
+; SI: s_waitcnt vmcnt(0) expcnt(0)
+; SI: s_branch BB0_1
define void @infinite_loop(i32 addrspace(1)* %out) {
entry:
br label %for.body
diff --git a/test/CodeGen/R600/inline-asm.ll b/test/CodeGen/R600/inline-asm.ll
new file mode 100644
index 000000000000..6f1f977de2a4
--- /dev/null
+++ b/test/CodeGen/R600/inline-asm.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
+
+; CHECK: {{^}}inline_asm:
+; CHECK: s_endpgm
+; CHECK: s_endpgm
+define void @inline_asm(i32 addrspace(1)* %out) {
+entry:
+ store i32 5, i32 addrspace(1)* %out
+ call void asm sideeffect "s_endpgm", ""()
+ ret void
+}
diff --git a/test/CodeGen/R600/inline-calls.ll b/test/CodeGen/R600/inline-calls.ll
new file mode 100644
index 000000000000..b8700d55e155
--- /dev/null
+++ b/test/CodeGen/R600/inline-calls.ll
@@ -0,0 +1,24 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s
+
+; CHECK-NOT: {{^}}func:
+define internal fastcc i32 @func(i32 %a) {
+entry:
+ %tmp0 = add i32 %a, 1
+ ret i32 %tmp0
+}
+
+; CHECK: {{^}}kernel:
+define void @kernel(i32 addrspace(1)* %out) {
+entry:
+ %tmp0 = call i32 @func(i32 1)
+ store i32 %tmp0, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: {{^}}kernel2:
+define void @kernel2(i32 addrspace(1)* %out) {
+entry:
+ call void @kernel(i32 addrspace(1)* %out)
+ ret void
+}
diff --git a/test/CodeGen/R600/input-mods.ll b/test/CodeGen/R600/input-mods.ll
index 13bfbab85695..e3e94995fc95 100644
--- a/test/CodeGen/R600/input-mods.ll
+++ b/test/CodeGen/R600/input-mods.ll
@@ -1,9 +1,9 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK
-;EG-CHECK-LABEL: @test
+;EG-CHECK-LABEL: {{^}}test:
;EG-CHECK: EXP_IEEE *
-;CM-CHECK-LABEL: @test
+;CM-CHECK-LABEL: {{^}}test:
;CM-CHECK: EXP_IEEE T{{[0-9]+}}.X, -|T{{[0-9]+}}.X|
;CM-CHECK: EXP_IEEE T{{[0-9]+}}.Y (MASKED), -|T{{[0-9]+}}.X|
;CM-CHECK: EXP_IEEE T{{[0-9]+}}.Z (MASKED), -|T{{[0-9]+}}.X|
diff --git a/test/CodeGen/R600/insert_subreg.ll b/test/CodeGen/R600/insert_subreg.ll
new file mode 100644
index 000000000000..dfb58d54796c
--- /dev/null
+++ b/test/CodeGen/R600/insert_subreg.ll
@@ -0,0 +1,15 @@
+; RUN: llc -march=amdgcn -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs < %s
+
+; Test that INSERT_SUBREG instructions don't have non-register operands after
+; instruction selection.
+
+; Make sure this doesn't crash
+; CHECK-LABEL: test:
+define void @test(i64 addrspace(1)* %out) {
+entry:
+ %tmp0 = alloca [16 x i32]
+ %tmp1 = ptrtoint [16 x i32]* %tmp0 to i32
+ %tmp2 = sext i32 %tmp1 to i64
+ store i64 %tmp2, i64 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/insert_vector_elt.ll b/test/CodeGen/R600/insert_vector_elt.ll
index 43b4efc93377..2442c868444f 100644
--- a/test/CodeGen/R600/insert_vector_elt.ll
+++ b/test/CodeGen/R600/insert_vector_elt.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI %s
; FIXME: Broken on evergreen
; FIXME: For some reason the 8 and 16 vectors are being stored as
@@ -8,116 +8,116 @@
; FIXME: Why is the constant moved into the intermediate register and
; not just directly into the vector component?
-; SI-LABEL: @insertelement_v4f32_0:
-; S_LOAD_DWORDX4 s{{[}}[[LOW_REG:[0-9]+]]:
-; V_MOV_B32_e32
-; V_MOV_B32_e32 [[CONSTREG:v[0-9]+]], 5.000000e+00
-; V_MOV_B32_e32 v[[LOW_REG]], [[CONSTREG]]
-; BUFFER_STORE_DWORDX4 v{{[}}[[LOW_REG]]:
+; SI-LABEL: {{^}}insertelement_v4f32_0:
+; s_load_dwordx4 s{{[}}[[LOW_REG:[0-9]+]]:
+; v_mov_b32_e32
+; v_mov_b32_e32 [[CONSTREG:v[0-9]+]], 5.000000e+00
+; v_mov_b32_e32 v[[LOW_REG]], [[CONSTREG]]
+; buffer_store_dwordx4 v{{[}}[[LOW_REG]]:
define void @insertelement_v4f32_0(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
%vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 0
store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
ret void
}
-; SI-LABEL: @insertelement_v4f32_1:
+; SI-LABEL: {{^}}insertelement_v4f32_1:
define void @insertelement_v4f32_1(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
%vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 1
store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
ret void
}
-; SI-LABEL: @insertelement_v4f32_2:
+; SI-LABEL: {{^}}insertelement_v4f32_2:
define void @insertelement_v4f32_2(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
%vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 2
store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
ret void
}
-; SI-LABEL: @insertelement_v4f32_3:
+; SI-LABEL: {{^}}insertelement_v4f32_3:
define void @insertelement_v4f32_3(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
%vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 3
store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
ret void
}
-; SI-LABEL: @insertelement_v4i32_0:
+; SI-LABEL: {{^}}insertelement_v4i32_0:
define void @insertelement_v4i32_0(<4 x i32> addrspace(1)* %out, <4 x i32> %a) nounwind {
%vecins = insertelement <4 x i32> %a, i32 999, i32 0
store <4 x i32> %vecins, <4 x i32> addrspace(1)* %out, align 16
ret void
}
-; SI-LABEL: @dynamic_insertelement_v2f32:
-; SI: V_MOV_B32_e32 [[CONST:v[0-9]+]], 5.000000e+00
-; SI: V_MOVRELD_B32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
-; SI: BUFFER_STORE_DWORDX2 {{v\[}}[[LOW_RESULT_REG]]:
+; SI-LABEL: {{^}}dynamic_insertelement_v2f32:
+; SI: v_mov_b32_e32 [[CONST:v[0-9]+]], 0x40a00000
+; SI: v_movreld_b32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
+; SI: buffer_store_dwordx2 {{v\[}}[[LOW_RESULT_REG]]:
define void @dynamic_insertelement_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, i32 %b) nounwind {
%vecins = insertelement <2 x float> %a, float 5.000000e+00, i32 %b
store <2 x float> %vecins, <2 x float> addrspace(1)* %out, align 8
ret void
}
-; SI-LABEL: @dynamic_insertelement_v4f32:
-; SI: V_MOV_B32_e32 [[CONST:v[0-9]+]], 5.000000e+00
-; SI: V_MOVRELD_B32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
-; SI: BUFFER_STORE_DWORDX4 {{v\[}}[[LOW_RESULT_REG]]:
+; SI-LABEL: {{^}}dynamic_insertelement_v4f32:
+; SI: v_mov_b32_e32 [[CONST:v[0-9]+]], 0x40a00000
+; SI: v_movreld_b32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
+; SI: buffer_store_dwordx4 {{v\[}}[[LOW_RESULT_REG]]:
define void @dynamic_insertelement_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, i32 %b) nounwind {
%vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 %b
store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
ret void
}
-; SI-LABEL: @dynamic_insertelement_v8f32:
-; FIXMESI: BUFFER_STORE_DWORDX4
-; FIXMESI: BUFFER_STORE_DWORDX4
+; SI-LABEL: {{^}}dynamic_insertelement_v8f32:
+; FIXMESI: buffer_store_dwordx4
+; FIXMESI: buffer_store_dwordx4
define void @dynamic_insertelement_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, i32 %b) nounwind {
%vecins = insertelement <8 x float> %a, float 5.000000e+00, i32 %b
store <8 x float> %vecins, <8 x float> addrspace(1)* %out, align 32
ret void
}
-; SI-LABEL: @dynamic_insertelement_v16f32:
-; FIXMESI: BUFFER_STORE_DWORDX4
-; FIXMESI: BUFFER_STORE_DWORDX4
-; FIXMESI: BUFFER_STORE_DWORDX4
-; FIXMESI: BUFFER_STORE_DWORDX4
+; SI-LABEL: {{^}}dynamic_insertelement_v16f32:
+; FIXMESI: buffer_store_dwordx4
+; FIXMESI: buffer_store_dwordx4
+; FIXMESI: buffer_store_dwordx4
+; FIXMESI: buffer_store_dwordx4
define void @dynamic_insertelement_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, i32 %b) nounwind {
%vecins = insertelement <16 x float> %a, float 5.000000e+00, i32 %b
store <16 x float> %vecins, <16 x float> addrspace(1)* %out, align 64
ret void
}
-; SI-LABEL: @dynamic_insertelement_v2i32:
-; SI: BUFFER_STORE_DWORDX2
+; SI-LABEL: {{^}}dynamic_insertelement_v2i32:
+; SI: buffer_store_dwordx2
define void @dynamic_insertelement_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, i32 %b) nounwind {
%vecins = insertelement <2 x i32> %a, i32 5, i32 %b
store <2 x i32> %vecins, <2 x i32> addrspace(1)* %out, align 8
ret void
}
-; SI-LABEL: @dynamic_insertelement_v4i32:
-; SI: BUFFER_STORE_DWORDX4
+; SI-LABEL: {{^}}dynamic_insertelement_v4i32:
+; SI: buffer_store_dwordx4
define void @dynamic_insertelement_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, i32 %b) nounwind {
%vecins = insertelement <4 x i32> %a, i32 5, i32 %b
store <4 x i32> %vecins, <4 x i32> addrspace(1)* %out, align 16
ret void
}
-; SI-LABEL: @dynamic_insertelement_v8i32:
-; FIXMESI: BUFFER_STORE_DWORDX4
-; FIXMESI: BUFFER_STORE_DWORDX4
+; SI-LABEL: {{^}}dynamic_insertelement_v8i32:
+; FIXMESI: buffer_store_dwordx4
+; FIXMESI: buffer_store_dwordx4
define void @dynamic_insertelement_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, i32 %b) nounwind {
%vecins = insertelement <8 x i32> %a, i32 5, i32 %b
store <8 x i32> %vecins, <8 x i32> addrspace(1)* %out, align 32
ret void
}
-; SI-LABEL: @dynamic_insertelement_v16i32:
-; FIXMESI: BUFFER_STORE_DWORDX4
-; FIXMESI: BUFFER_STORE_DWORDX4
-; FIXMESI: BUFFER_STORE_DWORDX4
-; FIXMESI: BUFFER_STORE_DWORDX4
+; SI-LABEL: {{^}}dynamic_insertelement_v16i32:
+; FIXMESI: buffer_store_dwordx4
+; FIXMESI: buffer_store_dwordx4
+; FIXMESI: buffer_store_dwordx4
+; FIXMESI: buffer_store_dwordx4
define void @dynamic_insertelement_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> %a, i32 %b) nounwind {
%vecins = insertelement <16 x i32> %a, i32 5, i32 %b
store <16 x i32> %vecins, <16 x i32> addrspace(1)* %out, align 64
@@ -125,16 +125,16 @@ define void @dynamic_insertelement_v16i32(<16 x i32> addrspace(1)* %out, <16 x i
}
-; SI-LABEL: @dynamic_insertelement_v2i16:
-; FIXMESI: BUFFER_STORE_DWORDX2
+; SI-LABEL: {{^}}dynamic_insertelement_v2i16:
+; FIXMESI: buffer_store_dwordx2
define void @dynamic_insertelement_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, i32 %b) nounwind {
%vecins = insertelement <2 x i16> %a, i16 5, i32 %b
store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out, align 8
ret void
}
-; SI-LABEL: @dynamic_insertelement_v4i16:
-; FIXMESI: BUFFER_STORE_DWORDX4
+; SI-LABEL: {{^}}dynamic_insertelement_v4i16:
+; FIXMESI: buffer_store_dwordx4
define void @dynamic_insertelement_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, i32 %b) nounwind {
%vecins = insertelement <4 x i16> %a, i16 5, i32 %b
store <4 x i16> %vecins, <4 x i16> addrspace(1)* %out, align 16
@@ -142,7 +142,7 @@ define void @dynamic_insertelement_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16>
}
-; SI-LABEL: @dynamic_insertelement_v2i8:
+; SI-LABEL: {{^}}dynamic_insertelement_v2i8:
; FIXMESI: BUFFER_STORE_USHORT
define void @dynamic_insertelement_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> %a, i32 %b) nounwind {
%vecins = insertelement <2 x i8> %a, i8 5, i32 %b
@@ -150,24 +150,24 @@ define void @dynamic_insertelement_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> %a
ret void
}
-; SI-LABEL: @dynamic_insertelement_v4i8:
-; FIXMESI: BUFFER_STORE_DWORD
+; SI-LABEL: {{^}}dynamic_insertelement_v4i8:
+; FIXMESI: buffer_store_dword
define void @dynamic_insertelement_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, i32 %b) nounwind {
%vecins = insertelement <4 x i8> %a, i8 5, i32 %b
store <4 x i8> %vecins, <4 x i8> addrspace(1)* %out, align 16
ret void
}
-; SI-LABEL: @dynamic_insertelement_v8i8:
-; FIXMESI: BUFFER_STORE_DWORDX2
+; SI-LABEL: {{^}}dynamic_insertelement_v8i8:
+; FIXMESI: buffer_store_dwordx2
define void @dynamic_insertelement_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> %a, i32 %b) nounwind {
%vecins = insertelement <8 x i8> %a, i8 5, i32 %b
store <8 x i8> %vecins, <8 x i8> addrspace(1)* %out, align 16
ret void
}
-; SI-LABEL: @dynamic_insertelement_v16i8:
-; FIXMESI: BUFFER_STORE_DWORDX4
+; SI-LABEL: {{^}}dynamic_insertelement_v16i8:
+; FIXMESI: buffer_store_dwordx4
define void @dynamic_insertelement_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> %a, i32 %b) nounwind {
%vecins = insertelement <16 x i8> %a, i8 5, i32 %b
store <16 x i8> %vecins, <16 x i8> addrspace(1)* %out, align 16
@@ -176,7 +176,7 @@ define void @dynamic_insertelement_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8>
; This test requires handling INSERT_SUBREG in SIFixSGPRCopies. Check that
; the compiler doesn't crash.
-; SI-LABEL: @insert_split_bb
+; SI-LABEL: {{^}}insert_split_bb:
define void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 addrspace(1)* %in, i32 %a, i32 %b) {
entry:
%0 = insertelement <2 x i32> undef, i32 %a, i32 0
@@ -199,3 +199,53 @@ endif:
store <2 x i32> %7, <2 x i32> addrspace(1)* %out
ret void
}
+
+; SI-LABEL: {{^}}dynamic_insertelement_v2f64:
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @dynamic_insertelement_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, i32 %b) nounwind {
+ %vecins = insertelement <2 x double> %a, double 8.0, i32 %b
+ store <2 x double> %vecins, <2 x double> addrspace(1)* %out, align 16
+ ret void
+}
+
+; SI-LABEL: {{^}}dynamic_insertelement_v2i64:
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @dynamic_insertelement_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> %a, i32 %b) nounwind {
+ %vecins = insertelement <2 x i64> %a, i64 5, i32 %b
+ store <2 x i64> %vecins, <2 x i64> addrspace(1)* %out, align 8
+ ret void
+}
+
+; SI-LABEL: {{^}}dynamic_insertelement_v4f64:
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @dynamic_insertelement_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, i32 %b) nounwind {
+ %vecins = insertelement <4 x double> %a, double 8.0, i32 %b
+ store <4 x double> %vecins, <4 x double> addrspace(1)* %out, align 16
+ ret void
+}
+
+; SI-LABEL: {{^}}dynamic_insertelement_v8f64:
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @dynamic_insertelement_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, i32 %b) nounwind {
+ %vecins = insertelement <8 x double> %a, double 8.0, i32 %b
+ store <8 x double> %vecins, <8 x double> addrspace(1)* %out, align 16
+ ret void
+}
diff --git a/test/CodeGen/R600/insert_vector_elt_f64.ll b/test/CodeGen/R600/insert_vector_elt_f64.ll
deleted file mode 100644
index 595bc59655ac..000000000000
--- a/test/CodeGen/R600/insert_vector_elt_f64.ll
+++ /dev/null
@@ -1,36 +0,0 @@
-; REQUIRES: asserts
-; XFAIL: *
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
-
-
-; SI-LABEL: @dynamic_insertelement_v2f64:
-; SI: BUFFER_STORE_DWORDX4
-define void @dynamic_insertelement_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, i32 %b) nounwind {
- %vecins = insertelement <2 x double> %a, double 8.0, i32 %b
- store <2 x double> %vecins, <2 x double> addrspace(1)* %out, align 16
- ret void
-}
-
-; SI-LABEL: @dynamic_insertelement_v2f64:
-; SI: BUFFER_STORE_DWORDX4
-define void @dynamic_insertelement_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> %a, i32 %b) nounwind {
- %vecins = insertelement <2 x i64> %a, i64 5, i32 %b
- store <2 x i64> %vecins, <2 x i64> addrspace(1)* %out, align 8
- ret void
-}
-
-; SI-LABEL: @dynamic_insertelement_v4f64:
-; SI: BUFFER_STORE_DWORDX4
-define void @dynamic_insertelement_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, i32 %b) nounwind {
- %vecins = insertelement <4 x double> %a, double 8.0, i32 %b
- store <4 x double> %vecins, <4 x double> addrspace(1)* %out, align 16
- ret void
-}
-
-; SI-LABEL: @dynamic_insertelement_v8f64:
-; SI: BUFFER_STORE_DWORDX4
-define void @dynamic_insertelement_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, i32 %b) nounwind {
- %vecins = insertelement <8 x double> %a, double 8.0, i32 %b
- store <8 x double> %vecins, <8 x double> addrspace(1)* %out, align 16
- ret void
-}
diff --git a/test/CodeGen/R600/kcache-fold.ll b/test/CodeGen/R600/kcache-fold.ll
index 0baa3cd3e1a3..27840b2e1609 100644
--- a/test/CodeGen/R600/kcache-fold.ll
+++ b/test/CodeGen/R600/kcache-fold.ll
@@ -1,6 +1,6 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-; CHECK: @main1
+; CHECK: {{^}}main1:
; CHECK: MOV * T{{[0-9]+\.[XYZW], KC0}}
define void @main1() {
main_body:
@@ -48,7 +48,7 @@ main_body:
ret void
}
-; CHECK: @main2
+; CHECK: {{^}}main2:
; CHECK-NOT: MOV
define void @main2() {
main_body:
diff --git a/test/CodeGen/R600/kernel-args.ll b/test/CodeGen/R600/kernel-args.ll
index 6fc69792fd3d..1984d333d4d5 100644
--- a/test/CodeGen/R600/kernel-args.ll
+++ b/test/CodeGen/R600/kernel-args.ll
@@ -1,11 +1,11 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
-; EG-CHECK-LABEL: @i8_arg
+; EG-CHECK-LABEL: {{^}}i8_arg:
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK-LABEL: @i8_arg
-; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK-LABEL: {{^}}i8_arg:
+; SI-CHECK: buffer_load_ubyte
define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
entry:
@@ -14,10 +14,10 @@ entry:
ret void
}
-; EG-CHECK-LABEL: @i8_zext_arg
+; EG-CHECK-LABEL: {{^}}i8_zext_arg:
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK-LABEL: @i8_zext_arg
-; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
+; SI-CHECK-LABEL: {{^}}i8_zext_arg:
+; SI-CHECK: s_load_dword s{{[0-9]}}, s[0:1], 0xb
define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
entry:
@@ -26,10 +26,10 @@ entry:
ret void
}
-; EG-CHECK-LABEL: @i8_sext_arg
+; EG-CHECK-LABEL: {{^}}i8_sext_arg:
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK-LABEL: @i8_sext_arg
-; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
+; SI-CHECK-LABEL: {{^}}i8_sext_arg:
+; SI-CHECK: s_load_dword s{{[0-9]}}, s[0:1], 0xb
define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
entry:
@@ -38,10 +38,10 @@ entry:
ret void
}
-; EG-CHECK-LABEL: @i16_arg
+; EG-CHECK-LABEL: {{^}}i16_arg:
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK-LABEL: @i16_arg
-; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK-LABEL: {{^}}i16_arg:
+; SI-CHECK: buffer_load_ushort
define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
entry:
@@ -50,10 +50,10 @@ entry:
ret void
}
-; EG-CHECK-LABEL: @i16_zext_arg
+; EG-CHECK-LABEL: {{^}}i16_zext_arg:
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK-LABEL: @i16_zext_arg
-; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
+; SI-CHECK-LABEL: {{^}}i16_zext_arg:
+; SI-CHECK: s_load_dword s{{[0-9]}}, s[0:1], 0xb
define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
entry:
@@ -62,10 +62,10 @@ entry:
ret void
}
-; EG-CHECK-LABEL: @i16_sext_arg
+; EG-CHECK-LABEL: {{^}}i16_sext_arg:
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK-LABEL: @i16_sext_arg
-; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
+; SI-CHECK-LABEL: {{^}}i16_sext_arg:
+; SI-CHECK: s_load_dword s{{[0-9]}}, s[0:1], 0xb
define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
entry:
@@ -74,176 +74,176 @@ entry:
ret void
}
-; EG-CHECK-LABEL: @i32_arg
+; EG-CHECK-LABEL: {{^}}i32_arg:
; EG-CHECK: T{{[0-9]\.[XYZW]}}, KC0[2].Z
-; SI-CHECK-LABEL: @i32_arg
-; S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
+; SI-CHECK-LABEL: {{^}}i32_arg:
+; s_load_dword s{{[0-9]}}, s[0:1], 0xb
define void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind {
entry:
store i32 %in, i32 addrspace(1)* %out, align 4
ret void
}
-; EG-CHECK-LABEL: @f32_arg
+; EG-CHECK-LABEL: {{^}}f32_arg:
; EG-CHECK: T{{[0-9]\.[XYZW]}}, KC0[2].Z
-; SI-CHECK-LABEL: @f32_arg
-; S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
+; SI-CHECK-LABEL: {{^}}f32_arg:
+; s_load_dword s{{[0-9]}}, s[0:1], 0xb
define void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind {
entry:
store float %in, float addrspace(1)* %out, align 4
ret void
}
-; EG-CHECK-LABEL: @v2i8_arg
+; EG-CHECK-LABEL: {{^}}v2i8_arg:
; EG-CHECK: VTX_READ_8
; EG-CHECK: VTX_READ_8
-; SI-CHECK-LABEL: @v2i8_arg
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK-LABEL: {{^}}v2i8_arg:
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
define void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
entry:
store <2 x i8> %in, <2 x i8> addrspace(1)* %out
ret void
}
-; EG-CHECK-LABEL: @v2i16_arg
+; EG-CHECK-LABEL: {{^}}v2i16_arg:
; EG-CHECK: VTX_READ_16
; EG-CHECK: VTX_READ_16
-; SI-CHECK-LABEL: @v2i16_arg
-; SI-CHECK-DAG: BUFFER_LOAD_USHORT
-; SI-CHECK-DAG: BUFFER_LOAD_USHORT
+; SI-CHECK-LABEL: {{^}}v2i16_arg:
+; SI-CHECK-DAG: buffer_load_ushort
+; SI-CHECK-DAG: buffer_load_ushort
define void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) {
entry:
store <2 x i16> %in, <2 x i16> addrspace(1)* %out
ret void
}
-; EG-CHECK-LABEL: @v2i32_arg
+; EG-CHECK-LABEL: {{^}}v2i32_arg:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
-; SI-CHECK-LABEL: @v2i32_arg
-; SI-CHECK: S_LOAD_DWORDX2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
+; SI-CHECK-LABEL: {{^}}v2i32_arg:
+; SI-CHECK: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
define void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind {
entry:
store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4
ret void
}
-; EG-CHECK-LABEL: @v2f32_arg
+; EG-CHECK-LABEL: {{^}}v2f32_arg:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
-; SI-CHECK-LABEL: @v2f32_arg
-; SI-CHECK: S_LOAD_DWORDX2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
+; SI-CHECK-LABEL: {{^}}v2f32_arg:
+; SI-CHECK: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
define void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind {
entry:
store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4
ret void
}
-; EG-CHECK-LABEL: @v3i8_arg
+; EG-CHECK-LABEL: {{^}}v3i8_arg:
; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40
; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41
; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42
-; SI-CHECK-LABEL: @v3i8_arg
+; SI-CHECK-LABEL: {{^}}v3i8_arg:
define void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind {
entry:
store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4
ret void
}
-; EG-CHECK-LABEL: @v3i16_arg
+; EG-CHECK-LABEL: {{^}}v3i16_arg:
; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44
; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46
; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48
-; SI-CHECK-LABEL: @v3i16_arg
+; SI-CHECK-LABEL: {{^}}v3i16_arg:
define void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind {
entry:
store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4
ret void
}
-; EG-CHECK-LABEL: @v3i32_arg
+; EG-CHECK-LABEL: {{^}}v3i32_arg:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
-; SI-CHECK-LABEL: @v3i32_arg
-; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
+; SI-CHECK-LABEL: {{^}}v3i32_arg:
+; SI-CHECK: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
define void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind {
entry:
store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4
ret void
}
-; EG-CHECK-LABEL: @v3f32_arg
+; EG-CHECK-LABEL: {{^}}v3f32_arg:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
-; SI-CHECK-LABEL: @v3f32_arg
-; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
+; SI-CHECK-LABEL: {{^}}v3f32_arg:
+; SI-CHECK: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
define void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind {
entry:
store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4
ret void
}
-; EG-CHECK-LABEL: @v4i8_arg
+; EG-CHECK-LABEL: {{^}}v4i8_arg:
; EG-CHECK: VTX_READ_8
; EG-CHECK: VTX_READ_8
; EG-CHECK: VTX_READ_8
; EG-CHECK: VTX_READ_8
-; SI-CHECK-LABEL: @v4i8_arg
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK-LABEL: {{^}}v4i8_arg:
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
define void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
entry:
store <4 x i8> %in, <4 x i8> addrspace(1)* %out
ret void
}
-; EG-CHECK-LABEL: @v4i16_arg
+; EG-CHECK-LABEL: {{^}}v4i16_arg:
; EG-CHECK: VTX_READ_16
; EG-CHECK: VTX_READ_16
; EG-CHECK: VTX_READ_16
; EG-CHECK: VTX_READ_16
-; SI-CHECK-LABEL: @v4i16_arg
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK-LABEL: {{^}}v4i16_arg:
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
define void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) {
entry:
store <4 x i16> %in, <4 x i16> addrspace(1)* %out
ret void
}
-; EG-CHECK-LABEL: @v4i32_arg
+; EG-CHECK-LABEL: {{^}}v4i32_arg:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
-; SI-CHECK-LABEL: @v4i32_arg
-; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
+; SI-CHECK-LABEL: {{^}}v4i32_arg:
+; SI-CHECK: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
define void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind {
entry:
store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
ret void
}
-; EG-CHECK-LABEL: @v4f32_arg
+; EG-CHECK-LABEL: {{^}}v4f32_arg:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
-; SI-CHECK-LABEL: @v4f32_arg
-; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
+; SI-CHECK-LABEL: {{^}}v4f32_arg:
+; SI-CHECK: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
define void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind {
entry:
store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4
ret void
}
-; EG-CHECK-LABEL: @v8i8_arg
+; EG-CHECK-LABEL: {{^}}v8i8_arg:
; EG-CHECK: VTX_READ_8
; EG-CHECK: VTX_READ_8
; EG-CHECK: VTX_READ_8
@@ -252,21 +252,21 @@ entry:
; EG-CHECK: VTX_READ_8
; EG-CHECK: VTX_READ_8
; EG-CHECK: VTX_READ_8
-; SI-CHECK-LABEL: @v8i8_arg
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK-LABEL: {{^}}v8i8_arg:
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
define void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
entry:
store <8 x i8> %in, <8 x i8> addrspace(1)* %out
ret void
}
-; EG-CHECK-LABEL: @v8i16_arg
+; EG-CHECK-LABEL: {{^}}v8i16_arg:
; EG-CHECK: VTX_READ_16
; EG-CHECK: VTX_READ_16
; EG-CHECK: VTX_READ_16
@@ -275,22 +275,22 @@ entry:
; EG-CHECK: VTX_READ_16
; EG-CHECK: VTX_READ_16
; EG-CHECK: VTX_READ_16
-; SI-CHECK-LABEL: @v8i16_arg
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK-LABEL: {{^}}v8i16_arg:
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
define void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) {
entry:
store <8 x i16> %in, <8 x i16> addrspace(1)* %out
ret void
}
-; EG-CHECK-LABEL: @v8i32_arg
+; EG-CHECK-LABEL: {{^}}v8i32_arg:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
@@ -299,15 +299,15 @@ entry:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
-; SI-CHECK-LABEL: @v8i32_arg
-; SI-CHECK: S_LOAD_DWORDX8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
+; SI-CHECK-LABEL: {{^}}v8i32_arg:
+; SI-CHECK: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
entry:
store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4
ret void
}
-; EG-CHECK-LABEL: @v8f32_arg
+; EG-CHECK-LABEL: {{^}}v8f32_arg:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
@@ -316,15 +316,15 @@ entry:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
-; SI-CHECK-LABEL: @v8f32_arg
-; SI-CHECK: S_LOAD_DWORDX8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
+; SI-CHECK-LABEL: {{^}}v8f32_arg:
+; SI-CHECK: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
entry:
store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4
ret void
}
-; EG-CHECK-LABEL: @v16i8_arg
+; EG-CHECK-LABEL: {{^}}v16i8_arg:
; EG-CHECK: VTX_READ_8
; EG-CHECK: VTX_READ_8
; EG-CHECK: VTX_READ_8
@@ -341,30 +341,30 @@ entry:
; EG-CHECK: VTX_READ_8
; EG-CHECK: VTX_READ_8
; EG-CHECK: VTX_READ_8
-; SI-CHECK-LABEL: @v16i8_arg
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK-LABEL: {{^}}v16i8_arg:
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
define void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) {
entry:
store <16 x i8> %in, <16 x i8> addrspace(1)* %out
ret void
}
-; EG-CHECK-LABEL: @v16i16_arg
+; EG-CHECK-LABEL: {{^}}v16i16_arg:
; EG-CHECK: VTX_READ_16
; EG-CHECK: VTX_READ_16
; EG-CHECK: VTX_READ_16
@@ -381,30 +381,30 @@ entry:
; EG-CHECK: VTX_READ_16
; EG-CHECK: VTX_READ_16
; EG-CHECK: VTX_READ_16
-; SI-CHECK-LABEL: @v16i16_arg
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK-LABEL: {{^}}v16i16_arg:
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
define void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) {
entry:
store <16 x i16> %in, <16 x i16> addrspace(1)* %out
ret void
}
-; EG-CHECK-LABEL: @v16i32_arg
+; EG-CHECK-LABEL: {{^}}v16i32_arg:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
@@ -421,15 +421,15 @@ entry:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
-; SI-CHECK-LABEL: @v16i32_arg
-; SI-CHECK: S_LOAD_DWORDX16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
+; SI-CHECK-LABEL: {{^}}v16i32_arg:
+; SI-CHECK: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
entry:
store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4
ret void
}
-; EG-CHECK-LABEL: @v16f32_arg
+; EG-CHECK-LABEL: {{^}}v16f32_arg:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
@@ -446,10 +446,28 @@ entry:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
-; SI-CHECK-LABEL: @v16f32_arg
-; SI-CHECK: S_LOAD_DWORDX16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
+; SI-CHECK-LABEL: {{^}}v16f32_arg:
+; SI-CHECK: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
entry:
store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
ret void
}
+
+; FUNC-LABEL: {{^}}kernel_arg_i64:
+; SI: s_load_dwordx2
+; SI: s_load_dwordx2
+; SI: buffer_store_dwordx2
+define void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
+ store i64 %a, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; XFUNC-LABEL: {{^}}kernel_arg_v1i64:
+; XSI: s_load_dwordx2
+; XSI: s_load_dwordx2
+; XSI: buffer_store_dwordx2
+; define void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind {
+; store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8
+; ret void
+; }
diff --git a/test/CodeGen/R600/large-alloca.ll b/test/CodeGen/R600/large-alloca.ll
index d8be6d40f310..0a5e592ae893 100644
--- a/test/CodeGen/R600/large-alloca.ll
+++ b/test/CodeGen/R600/large-alloca.ll
@@ -1,6 +1,6 @@
; XFAIL: *
; REQUIRES: asserts
-; RUN: llc -march=r600 -mcpu=SI < %s
+; RUN: llc -march=amdgcn -mcpu=SI < %s
define void @large_alloca(i32 addrspace(1)* %out, i32 %x, i32 %y) nounwind {
%large = alloca [8192 x i32], align 4
diff --git a/test/CodeGen/R600/large-constant-initializer.ll b/test/CodeGen/R600/large-constant-initializer.ll
index 191b5c3de912..c11e82e76e65 100644
--- a/test/CodeGen/R600/large-constant-initializer.ll
+++ b/test/CodeGen/R600/large-constant-initializer.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=r600 -mcpu=SI < %s
-; CHECK: S_ENDPGM
+; RUN: llc -march=amdgcn -mcpu=SI < %s
+; CHECK: s_endpgm
@gv = external unnamed_addr addrspace(2) constant [239 x i32], align 4
diff --git a/test/CodeGen/R600/lds-initializer.ll b/test/CodeGen/R600/lds-initializer.ll
new file mode 100644
index 000000000000..9a209e50ca14
--- /dev/null
+++ b/test/CodeGen/R600/lds-initializer.ll
@@ -0,0 +1,12 @@
+; RUN: not llc -march=amdgcn -mcpu=SI < %s 2>&1 | FileCheck %s
+
+; CHECK: error: unsupported initializer for address space in load_init_lds_global
+
+@lds = addrspace(3) global [8 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8]
+
+define void @load_init_lds_global(i32 addrspace(1)* %out, i1 %p) {
+ %gep = getelementptr [8 x i32] addrspace(3)* @lds, i32 0, i32 10
+ %ld = load i32 addrspace(3)* %gep
+ store i32 %ld, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/lds-oqap-crash.ll b/test/CodeGen/R600/lds-oqap-crash.ll
index 79591506e8cb..fbcd778de2c2 100644
--- a/test/CodeGen/R600/lds-oqap-crash.ll
+++ b/test/CodeGen/R600/lds-oqap-crash.ll
@@ -9,7 +9,7 @@
; because the LDS instructions are pseudo instructions and the OQAP
; reads and writes are bundled together in the same instruction.
-; CHECK: @lds_crash
+; CHECK: {{^}}lds_crash:
define void @lds_crash(i32 addrspace(1)* %out, i32 addrspace(3)* %in, i32 %a, i32 %b, i32 %c) {
entry:
%0 = load i32 addrspace(3)* %in
diff --git a/test/CodeGen/R600/lds-output-queue.ll b/test/CodeGen/R600/lds-output-queue.ll
index d5dc061964e9..cda75b0e0ccc 100644
--- a/test/CodeGen/R600/lds-output-queue.ll
+++ b/test/CodeGen/R600/lds-output-queue.ll
@@ -3,12 +3,12 @@
; This test checks that the lds input queue will is empty at the end of
; the ALU clause.
-; CHECK-LABEL: @lds_input_queue
+; CHECK-LABEL: {{^}}lds_input_queue:
; CHECK: LDS_READ_RET * OQAP
; CHECK-NOT: ALU clause
; CHECK: MOV * T{{[0-9]\.[XYZW]}}, OQAP
-@local_mem = internal unnamed_addr addrspace(3) global [2 x i32] [i32 1, i32 2], align 4
+@local_mem = internal unnamed_addr addrspace(3) global [2 x i32] undef, align 4
define void @lds_input_queue(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %index) {
entry:
@@ -84,7 +84,7 @@ declare void @llvm.AMDGPU.barrier.local()
; analysis, we should be able to keep these instructions sparate before
; scheduling.
;
-; CHECK-LABEL: @local_global_alias
+; CHECK-LABEL: {{^}}local_global_alias:
; CHECK: LDS_READ_RET
; CHECK-NOT: ALU clause
; CHECK: MOV * T{{[0-9]\.[XYZW]}}, OQAP
diff --git a/test/CodeGen/R600/lds-size.ll b/test/CodeGen/R600/lds-size.ll
index 9182e2561500..5287723ce191 100644
--- a/test/CodeGen/R600/lds-size.ll
+++ b/test/CodeGen/R600/lds-size.ll
@@ -3,10 +3,10 @@
; This test makes sure we do not double count global values when they are
; used in different basic blocks.
-; CHECK-LABEL: @test
+; CHECK-LABEL: {{^}}test:
; CHECK: .long 166120
; CHECK-NEXT: .long 1
-@lds = internal unnamed_addr addrspace(3) global i32 zeroinitializer, align 4
+@lds = internal unnamed_addr addrspace(3) global i32 undef, align 4
define void @test(i32 addrspace(1)* %out, i32 %cond) {
entry:
diff --git a/test/CodeGen/R600/lds-zero-initializer.ll b/test/CodeGen/R600/lds-zero-initializer.ll
new file mode 100644
index 000000000000..87e2c334879b
--- /dev/null
+++ b/test/CodeGen/R600/lds-zero-initializer.ll
@@ -0,0 +1,12 @@
+; RUN: not llc -march=amdgcn -mcpu=SI < %s 2>&1 | FileCheck %s
+
+; CHECK: error: unsupported initializer for address space in load_zeroinit_lds_global
+
+@lds = addrspace(3) global [256 x i32] zeroinitializer
+
+define void @load_zeroinit_lds_global(i32 addrspace(1)* %out, i1 %p) {
+ %gep = getelementptr [256 x i32] addrspace(3)* @lds, i32 0, i32 10
+ %ld = load i32 addrspace(3)* %gep
+ store i32 %ld, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll b/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll
index 1aae7f9f91f4..b9fa8e938ae4 100644
--- a/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll
+++ b/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll
@@ -8,7 +8,7 @@
; instructions, when only one is needed.
;
-; CHECK: @setcc_expand
+; CHECK: {{^}}setcc_expand:
; CHECK: SET
; CHECK-NOT: CND
define void @setcc_expand(i32 addrspace(1)* %out, i32 %in) {
diff --git a/test/CodeGen/R600/literals.ll b/test/CodeGen/R600/literals.ll
index 47191e0a27fb..cff1c24f89d6 100644
--- a/test/CodeGen/R600/literals.ll
+++ b/test/CodeGen/R600/literals.ll
@@ -6,7 +6,7 @@
; or
; ADD_INT literal.x KC0[2].Z, 5
-; CHECK: @i32_literal
+; CHECK: {{^}}i32_literal:
; CHECK: ADD_INT {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
; CHECK-NEXT: LSHR
; CHECK-NEXT: 5
@@ -23,7 +23,7 @@ entry:
; or
; ADD literal.x KC0[2].Z, 5.0
-; CHECK: @float_literal
+; CHECK: {{^}}float_literal:
; CHECK: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.0
@@ -35,7 +35,7 @@ entry:
}
; Make sure inline literals are folded into REG_SEQUENCE instructions.
-; CHECK: @inline_literal_reg_sequence
+; CHECK: {{^}}inline_literal_reg_sequence:
; CHECK: MOV {{\** *}}T[[GPR:[0-9]]].X, 0.0
; CHECK-NEXT: MOV {{\** *}}T[[GPR]].Y, 0.0
; CHECK-NEXT: MOV {{\** *}}T[[GPR]].Z, 0.0
@@ -47,7 +47,7 @@ entry:
ret void
}
-; CHECK: @inline_literal_dot4
+; CHECK: {{^}}inline_literal_dot4:
; CHECK: DOT4 T[[GPR:[0-9]]].X, 1.0
; CHECK-NEXT: DOT4 T[[GPR]].Y (MASKED), 1.0
; CHECK-NEXT: DOT4 T[[GPR]].Z (MASKED), 1.0
diff --git a/test/CodeGen/R600/llvm.AMDGPU.abs.ll b/test/CodeGen/R600/llvm.AMDGPU.abs.ll
index a0a47b7c4701..f143fd640989 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.abs.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.abs.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare i32 @llvm.AMDGPU.abs(i32) nounwind readnone
@@ -6,10 +6,10 @@ declare i32 @llvm.AMDGPU.abs(i32) nounwind readnone
; Legacy name
declare i32 @llvm.AMDIL.abs.i32(i32) nounwind readnone
-; FUNC-LABEL: @s_abs_i32
-; SI: S_SUB_I32
-; SI: S_MAX_I32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}s_abs_i32:
+; SI: s_sub_i32
+; SI: s_max_i32
+; SI: s_endpgm
; EG: SUB_INT
; EG: MAX_INT
@@ -19,10 +19,10 @@ define void @s_abs_i32(i32 addrspace(1)* %out, i32 %src) nounwind {
ret void
}
-; FUNC-LABEL: @v_abs_i32
-; SI: V_SUB_I32_e32
-; SI: V_MAX_I32_e32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_abs_i32:
+; SI: v_sub_i32_e32
+; SI: v_max_i32_e32
+; SI: s_endpgm
; EG: SUB_INT
; EG: MAX_INT
@@ -33,10 +33,10 @@ define void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind
ret void
}
-; FUNC-LABEL: @abs_i32_legacy_amdil
-; SI: V_SUB_I32_e32
-; SI: V_MAX_I32_e32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}abs_i32_legacy_amdil:
+; SI: v_sub_i32_e32
+; SI: v_max_i32_e32
+; SI: s_endpgm
; EG: SUB_INT
; EG: MAX_INT
diff --git a/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll b/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll
index 47f5255e5012..a11d9ae7af08 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll
@@ -1,9 +1,11 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; FUNC-LABEL: @test_barrier_global
+; FUNC-LABEL: {{^}}test_barrier_global:
; EG: GROUP_BARRIER
-; SI: S_BARRIER
+; SI: buffer_store_dword
+; SI: s_waitcnt
+; SI: s_barrier
define void @test_barrier_global(i32 addrspace(1)* %out) {
entry:
diff --git a/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll b/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll
index 7203675bb47b..76c2453d089f 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll
@@ -1,9 +1,12 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; FUNC-LABEL: @test_barrier_local
+; FUNC-LABEL: {{^}}test_barrier_local:
; EG: GROUP_BARRIER
-; SI: S_BARRIER
+
+; SI: buffer_store_dword
+; SI: s_waitcnt
+; SI: s_barrier
define void @test_barrier_local(i32 addrspace(1)* %out) {
entry:
diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll b/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
index eb5094232825..de8b928aa3a0 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone
-; FUNC-LABEL: @bfe_i32_arg_arg_arg
-; SI: V_BFE_I32
+; FUNC-LABEL: {{^}}bfe_i32_arg_arg_arg:
+; SI: v_bfe_i32
; EG: BFE_INT
; EG: encoding: [{{[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+}},0xac
define void @bfe_i32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
@@ -13,8 +13,8 @@ define void @bfe_i32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i
ret void
}
-; FUNC-LABEL: @bfe_i32_arg_arg_imm
-; SI: V_BFE_I32
+; FUNC-LABEL: {{^}}bfe_i32_arg_arg_imm:
+; SI: v_bfe_i32
; EG: BFE_INT
define void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 123) nounwind readnone
@@ -22,8 +22,8 @@ define void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) n
ret void
}
-; FUNC-LABEL: @bfe_i32_arg_imm_arg
-; SI: V_BFE_I32
+; FUNC-LABEL: {{^}}bfe_i32_arg_imm_arg:
+; SI: v_bfe_i32
; EG: BFE_INT
define void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 123, i32 %src2) nounwind readnone
@@ -31,8 +31,8 @@ define void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) n
ret void
}
-; FUNC-LABEL: @bfe_i32_imm_arg_arg
-; SI: V_BFE_I32
+; FUNC-LABEL: {{^}}bfe_i32_imm_arg_arg:
+; SI: v_bfe_i32
; EG: BFE_INT
define void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 123, i32 %src1, i32 %src2) nounwind readnone
@@ -40,8 +40,8 @@ define void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) n
ret void
}
-; FUNC-LABEL: @v_bfe_print_arg
-; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 8
+; FUNC-LABEL: {{^}}v_bfe_print_arg:
+; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 8
define void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) nounwind {
%load = load i32 addrspace(1)* %src0, align 4
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 2, i32 8) nounwind readnone
@@ -49,9 +49,9 @@ define void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) no
ret void
}
-; FUNC-LABEL: @bfe_i32_arg_0_width_reg_offset
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_arg_0_width_reg_offset:
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 0) nounwind readnone
@@ -59,9 +59,9 @@ define void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i
ret void
}
-; FUNC-LABEL: @bfe_i32_arg_0_width_imm_offset
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_arg_0_width_imm_offset:
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 8, i32 0) nounwind readnone
@@ -69,10 +69,10 @@ define void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i
ret void
}
-; FUNC-LABEL: @bfe_i32_test_6
-; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
-; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_test_6:
+; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
+; SI: s_endpgm
define void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
@@ -81,12 +81,12 @@ define void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
ret void
}
-; FUNC-LABEL: @bfe_i32_test_7
-; SI-NOT: SHL
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_test_7:
+; SI-NOT: shl
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
define void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
@@ -96,10 +96,10 @@ define void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
}
; FIXME: The shifts should be 1 BFE
-; FUNC-LABEL: @bfe_i32_test_8
-; SI: BUFFER_LOAD_DWORD
-; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_test_8:
+; SI: buffer_load_dword
+; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
+; SI: s_endpgm
define void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
@@ -108,11 +108,11 @@ define void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
ret void
}
-; FUNC-LABEL: @bfe_i32_test_9
-; SI-NOT: BFE
-; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_test_9:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 31, i32 1)
@@ -120,11 +120,11 @@ define void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
ret void
}
-; FUNC-LABEL: @bfe_i32_test_10
-; SI-NOT: BFE
-; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_test_10:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 1, i32 31)
@@ -132,11 +132,11 @@ define void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
ret void
}
-; FUNC-LABEL: @bfe_i32_test_11
-; SI-NOT: BFE
-; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_test_11:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 8, i32 24)
@@ -144,11 +144,11 @@ define void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
ret void
}
-; FUNC-LABEL: @bfe_i32_test_12
-; SI-NOT: BFE
-; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_test_12:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 24, i32 8)
@@ -156,10 +156,10 @@ define void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
ret void
}
-; FUNC-LABEL: @bfe_i32_test_13
-; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_test_13:
+; SI: v_ashrrev_i32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = ashr i32 %x, 31
@@ -167,10 +167,10 @@ define void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
}
-; FUNC-LABEL: @bfe_i32_test_14
-; SI-NOT: LSHR
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_test_14:
+; SI-NOT: lshr
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = lshr i32 %x, 31
@@ -178,11 +178,11 @@ define void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
}
-; FUNC-LABEL: @bfe_i32_constant_fold_test_0
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_0:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 0) nounwind readnone
@@ -190,11 +190,11 @@ define void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_i32_constant_fold_test_1
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_1:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 12334, i32 0, i32 0) nounwind readnone
@@ -202,11 +202,11 @@ define void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_i32_constant_fold_test_2
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_2:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 1) nounwind readnone
@@ -214,11 +214,11 @@ define void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_i32_constant_fold_test_3
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_3:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 1, i32 0, i32 1) nounwind readnone
@@ -226,11 +226,11 @@ define void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_i32_constant_fold_test_4
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_4:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 0, i32 1) nounwind readnone
@@ -238,11 +238,11 @@ define void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_i32_constant_fold_test_5
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_5:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 7, i32 1) nounwind readnone
@@ -250,11 +250,11 @@ define void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_i32_constant_fold_test_6
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0xffffff80
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_6:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0xffffff80
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 0, i32 8) nounwind readnone
@@ -262,11 +262,11 @@ define void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_i32_constant_fold_test_7
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_7:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 0, i32 8) nounwind readnone
@@ -274,11 +274,11 @@ define void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_i32_constant_fold_test_8
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_8:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 6, i32 8) nounwind readnone
@@ -286,11 +286,11 @@ define void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_i32_constant_fold_test_9
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_9:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65536, i32 16, i32 8) nounwind readnone
@@ -298,11 +298,11 @@ define void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_i32_constant_fold_test_10
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_10:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65535, i32 16, i32 16) nounwind readnone
@@ -310,11 +310,11 @@ define void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_i32_constant_fold_test_11
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -6
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_11:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -6
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 4) nounwind readnone
@@ -322,11 +322,11 @@ define void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_i32_constant_fold_test_12
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_12:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 31, i32 1) nounwind readnone
@@ -334,11 +334,11 @@ define void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_i32_constant_fold_test_13
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_13:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 131070, i32 16, i32 16) nounwind readnone
@@ -346,11 +346,11 @@ define void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_i32_constant_fold_test_14
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 40
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_14:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 40
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 2, i32 30) nounwind readnone
@@ -358,11 +358,11 @@ define void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_i32_constant_fold_test_15
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_15:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 28) nounwind readnone
@@ -370,11 +370,11 @@ define void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_i32_constant_fold_test_16
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_16:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 1, i32 7) nounwind readnone
@@ -382,11 +382,11 @@ define void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_i32_constant_fold_test_17
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_17:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 1, i32 31) nounwind readnone
@@ -394,11 +394,11 @@ define void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_i32_constant_fold_test_18
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_18:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 31, i32 1) nounwind readnone
@@ -408,14 +408,14 @@ define void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
; XXX - This should really be a single BFE, but the sext_inreg of the
; extended type i24 is never custom lowered.
-; FUNC-LABEL: @bfe_sext_in_reg_i24
-; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
-; SI: V_LSHLREV_B32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
-; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
-; XSI: V_BFE_I32 [[BFE:v[0-9]+]], [[LOAD]], 0, 8
+; FUNC-LABEL: {{^}}bfe_sext_in_reg_i24:
+; SI: buffer_load_dword [[LOAD:v[0-9]+]],
+; SI: v_lshlrev_b32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
+; SI: v_ashrrev_i32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
+; XSI: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 0, 8
; XSI-NOT: SHL
; XSI-NOT: SHR
-; XSI: BUFFER_STORE_DWORD [[BFE]],
+; XSI: buffer_store_dword [[BFE]],
define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 0, i32 24)
@@ -424,3 +424,18 @@ define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
store i32 %ashr, i32 addrspace(1)* %out, align 4
ret void
}
+
+; FUNC-LABEL: @simplify_demanded_bfe_sdiv
+; SI: buffer_load_dword [[LOAD:v[0-9]+]]
+; SI: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 1, 16
+; SI: v_lshrrev_b32_e32 [[TMP0:v[0-9]+]], 31, [[BFE]]
+; SI: v_add_i32_e32 [[TMP1:v[0-9]+]], [[TMP0]], [[BFE]]
+; SI: v_ashrrev_i32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]]
+; SI: buffer_store_dword [[TMP2]]
+define void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %src = load i32 addrspace(1)* %in, align 4
+ %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %src, i32 1, i32 16) nounwind readnone
+ %div = sdiv i32 %bfe, 2
+ store i32 %div, i32 addrspace(1)* %out, align 4
+ ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll b/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
index 1a62253eeb74..85bf831534a8 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare i32 @llvm.AMDGPU.bfe.u32(i32, i32, i32) nounwind readnone
-; FUNC-LABEL: @bfe_u32_arg_arg_arg
-; SI: V_BFE_U32
+; FUNC-LABEL: {{^}}bfe_u32_arg_arg_arg:
+; SI: v_bfe_u32
; EG: BFE_UINT
define void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 %src1) nounwind readnone
@@ -12,8 +12,8 @@ define void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i
ret void
}
-; FUNC-LABEL: @bfe_u32_arg_arg_imm
-; SI: V_BFE_U32
+; FUNC-LABEL: {{^}}bfe_u32_arg_arg_imm:
+; SI: v_bfe_u32
; EG: BFE_UINT
define void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 123) nounwind readnone
@@ -21,8 +21,8 @@ define void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) n
ret void
}
-; FUNC-LABEL: @bfe_u32_arg_imm_arg
-; SI: V_BFE_U32
+; FUNC-LABEL: {{^}}bfe_u32_arg_imm_arg:
+; SI: v_bfe_u32
; EG: BFE_UINT
define void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 123, i32 %src2) nounwind readnone
@@ -30,8 +30,8 @@ define void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) n
ret void
}
-; FUNC-LABEL: @bfe_u32_imm_arg_arg
-; SI: V_BFE_U32
+; FUNC-LABEL: {{^}}bfe_u32_imm_arg_arg:
+; SI: v_bfe_u32
; EG: BFE_UINT
define void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 123, i32 %src1, i32 %src2) nounwind readnone
@@ -39,9 +39,9 @@ define void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) n
ret void
}
-; FUNC-LABEL: @bfe_u32_arg_0_width_reg_offset
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_arg_0_width_reg_offset:
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 0) nounwind readnone
@@ -49,9 +49,9 @@ define void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i
ret void
}
-; FUNC-LABEL: @bfe_u32_arg_0_width_imm_offset
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_arg_0_width_imm_offset:
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 8, i32 0) nounwind readnone
@@ -59,10 +59,10 @@ define void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i
ret void
}
-; FUNC-LABEL: @bfe_u32_zextload_i8
-; SI: BUFFER_LOAD_UBYTE
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_zextload_i8:
+; SI: buffer_load_ubyte
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
%load = load i8 addrspace(1)* %in
%ext = zext i8 %load to i32
@@ -71,12 +71,12 @@ define void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) n
ret void
}
-; FUNC-LABEL: @bfe_u32_zext_in_reg_i8
-; SI: BUFFER_LOAD_DWORD
-; SI: V_ADD_I32
-; SI-NEXT: V_AND_B32_e32
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8:
+; SI: buffer_load_dword
+; SI: v_add_i32
+; SI-NEXT: v_and_b32_e32
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%load = load i32 addrspace(1)* %in, align 4
%add = add i32 %load, 1
@@ -86,12 +86,12 @@ define void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %i
ret void
}
-; FUNC-LABEL: @bfe_u32_zext_in_reg_i16
-; SI: BUFFER_LOAD_DWORD
-; SI: V_ADD_I32
-; SI-NEXT: V_AND_B32_e32
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i16:
+; SI: buffer_load_dword
+; SI: v_add_i32
+; SI-NEXT: v_and_b32_e32
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%load = load i32 addrspace(1)* %in, align 4
%add = add i32 %load, 1
@@ -101,11 +101,11 @@ define void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %
ret void
}
-; FUNC-LABEL: @bfe_u32_zext_in_reg_i8_offset_1
-; SI: BUFFER_LOAD_DWORD
-; SI: V_ADD_I32
-; SI: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_1:
+; SI: buffer_load_dword
+; SI: v_add_i32
+; SI: bfe
+; SI: s_endpgm
define void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%load = load i32 addrspace(1)* %in, align 4
%add = add i32 %load, 1
@@ -115,12 +115,12 @@ define void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspa
ret void
}
-; FUNC-LABEL: @bfe_u32_zext_in_reg_i8_offset_3
-; SI: BUFFER_LOAD_DWORD
-; SI: V_ADD_I32
-; SI-NEXT: V_AND_B32_e32 {{v[0-9]+}}, 0xf8
-; SI-NEXT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_3:
+; SI: buffer_load_dword
+; SI: v_add_i32
+; SI-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0xf8
+; SI-NEXT: bfe
+; SI: s_endpgm
define void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%load = load i32 addrspace(1)* %in, align 4
%add = add i32 %load, 1
@@ -130,12 +130,12 @@ define void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspa
ret void
}
-; FUNC-LABEL: @bfe_u32_zext_in_reg_i8_offset_7
-; SI: BUFFER_LOAD_DWORD
-; SI: V_ADD_I32
-; SI-NEXT: V_AND_B32_e32 {{v[0-9]+}}, 0x80
-; SI-NEXT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_7:
+; SI: buffer_load_dword
+; SI: v_add_i32
+; SI-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0x80
+; SI-NEXT: bfe
+; SI: s_endpgm
define void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%load = load i32 addrspace(1)* %in, align 4
%add = add i32 %load, 1
@@ -145,11 +145,11 @@ define void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspa
ret void
}
-; FUNC-LABEL: @bfe_u32_zext_in_reg_i16_offset_8
-; SI: BUFFER_LOAD_DWORD
-; SI: V_ADD_I32
-; SI-NEXT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i16_offset_8:
+; SI: buffer_load_dword
+; SI: v_add_i32
+; SI-NEXT: bfe
+; SI: s_endpgm
define void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%load = load i32 addrspace(1)* %in, align 4
%add = add i32 %load, 1
@@ -159,10 +159,10 @@ define void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrsp
ret void
}
-; FUNC-LABEL: @bfe_u32_test_1
-; SI: BUFFER_LOAD_DWORD
-; SI: V_AND_B32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_test_1:
+; SI: buffer_load_dword
+; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
+; SI: s_endpgm
; EG: AND_INT T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, 1,
define void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
@@ -187,13 +187,13 @@ define void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
ret void
}
-; FUNC-LABEL: @bfe_u32_test_4
-; SI-NOT: LSHL
-; SI-NOT: SHR
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_test_4:
+; SI-NOT: lshl
+; SI-NOT: shr
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
define void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
@@ -203,12 +203,12 @@ define void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
ret void
}
-; FUNC-LABEL: @bfe_u32_test_5
-; SI: BUFFER_LOAD_DWORD
-; SI-NOT: LSHL
-; SI-NOT: SHR
-; SI: V_BFE_I32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_test_5:
+; SI: buffer_load_dword
+; SI-NOT: lshl
+; SI-NOT: shr
+; SI: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1
+; SI: s_endpgm
define void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
@@ -218,10 +218,10 @@ define void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
ret void
}
-; FUNC-LABEL: @bfe_u32_test_6
-; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
-; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_test_6:
+; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
+; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
+; SI: s_endpgm
define void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
@@ -230,10 +230,10 @@ define void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
ret void
}
-; FUNC-LABEL: @bfe_u32_test_7
-; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_test_7:
+; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
@@ -242,11 +242,11 @@ define void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
ret void
}
-; FUNC-LABEL: @bfe_u32_test_8
-; SI-NOT: BFE
-; SI: V_AND_B32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_test_8:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
@@ -255,11 +255,11 @@ define void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
ret void
}
-; FUNC-LABEL: @bfe_u32_test_9
-; SI-NOT: BFE
-; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_test_9:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 31, i32 1)
@@ -267,11 +267,11 @@ define void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
ret void
}
-; FUNC-LABEL: @bfe_u32_test_10
-; SI-NOT: BFE
-; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_test_10:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 1, i32 31)
@@ -279,11 +279,11 @@ define void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
ret void
}
-; FUNC-LABEL: @bfe_u32_test_11
-; SI-NOT: BFE
-; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_test_11:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 8, i32 24)
@@ -291,11 +291,11 @@ define void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
ret void
}
-; FUNC-LABEL: @bfe_u32_test_12
-; SI-NOT: BFE
-; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_test_12:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 24, i32 8)
@@ -303,10 +303,10 @@ define void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
ret void
}
-; FUNC-LABEL: @bfe_u32_test_13
+; FUNC-LABEL: {{^}}bfe_u32_test_13:
; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = ashr i32 %x, 31
@@ -314,10 +314,10 @@ define void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
}
-; FUNC-LABEL: @bfe_u32_test_14
-; SI-NOT: LSHR
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_test_14:
+; SI-NOT: lshr
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = lshr i32 %x, 31
@@ -325,11 +325,11 @@ define void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
}
-; FUNC-LABEL: @bfe_u32_constant_fold_test_0
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_0:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 0) nounwind readnone
@@ -337,11 +337,11 @@ define void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_u32_constant_fold_test_1
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_1:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 12334, i32 0, i32 0) nounwind readnone
@@ -349,11 +349,11 @@ define void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_u32_constant_fold_test_2
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_2:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 1) nounwind readnone
@@ -361,11 +361,11 @@ define void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_u32_constant_fold_test_3
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_3:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 1, i32 0, i32 1) nounwind readnone
@@ -373,11 +373,11 @@ define void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_u32_constant_fold_test_4
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_4:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 0, i32 1) nounwind readnone
@@ -385,11 +385,11 @@ define void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_u32_constant_fold_test_5
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_5:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 7, i32 1) nounwind readnone
@@ -397,11 +397,11 @@ define void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_u32_constant_fold_test_6
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x80
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_6:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x80
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 0, i32 8) nounwind readnone
@@ -409,11 +409,11 @@ define void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_u32_constant_fold_test_7
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_7:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 0, i32 8) nounwind readnone
@@ -421,11 +421,11 @@ define void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_u32_constant_fold_test_8
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_8:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 6, i32 8) nounwind readnone
@@ -433,11 +433,11 @@ define void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_u32_constant_fold_test_9
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_9:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFEfppppppppppppp
define void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65536, i32 16, i32 8) nounwind readnone
@@ -445,11 +445,11 @@ define void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_u32_constant_fold_test_10
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_10:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65535, i32 16, i32 16) nounwind readnone
@@ -457,11 +457,11 @@ define void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_u32_constant_fold_test_11
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_11:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 4) nounwind readnone
@@ -469,11 +469,11 @@ define void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_u32_constant_fold_test_12
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_12:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 31, i32 1) nounwind readnone
@@ -481,11 +481,11 @@ define void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_u32_constant_fold_test_13
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_13:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 131070, i32 16, i32 16) nounwind readnone
@@ -493,11 +493,11 @@ define void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_u32_constant_fold_test_14
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 40
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_14:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 40
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 2, i32 30) nounwind readnone
@@ -505,11 +505,11 @@ define void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_u32_constant_fold_test_15
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_15:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 28) nounwind readnone
@@ -517,11 +517,11 @@ define void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_u32_constant_fold_test_16
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_16:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 1, i32 7) nounwind readnone
@@ -529,11 +529,11 @@ define void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_u32_constant_fold_test_17
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_17:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 1, i32 31) nounwind readnone
@@ -541,14 +541,36 @@ define void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
ret void
}
-; FUNC-LABEL: @bfe_u32_constant_fold_test_18
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_18:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 31, i32 1) nounwind readnone
store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
ret void
}
+
+; Make sure that SimplifyDemandedBits doesn't cause the and to be
+; reduced to the bits demanded by the bfe.
+
+; XXX: The operand to v_bfe_u32 could also just directly be the load register.
+; FUNC-LABEL: {{^}}simplify_bfe_u32_multi_use_arg:
+; SI: buffer_load_dword [[ARG:v[0-9]+]]
+; SI: v_and_b32_e32 [[AND:v[0-9]+]], 63, [[ARG]]
+; SI: v_bfe_u32 [[BFE:v[0-9]+]], [[AND]], 2, 2
+; SI-DAG: buffer_store_dword [[AND]]
+; SI-DAG: buffer_store_dword [[BFE]]
+; SI: s_endpgm
+define void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0,
+ i32 addrspace(1)* %out1,
+ i32 addrspace(1)* %in) nounwind {
+ %src = load i32 addrspace(1)* %in, align 4
+ %and = and i32 %src, 63
+ %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %and, i32 2, i32 2) nounwind readnone
+ store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4
+ store i32 %and, i32 addrspace(1)* %out1, align 4
+ ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfi.ll b/test/CodeGen/R600/llvm.AMDGPU.bfi.ll
index e1de45b4ba29..b3da24657f49 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.bfi.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.bfi.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare i32 @llvm.AMDGPU.bfi(i32, i32, i32) nounwind readnone
-; FUNC-LABEL: @bfi_arg_arg_arg
-; SI: V_BFI_B32
+; FUNC-LABEL: {{^}}bfi_arg_arg_arg:
+; SI: v_bfi_b32
; EG: BFI_INT
define void @bfi_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
%bfi = call i32 @llvm.AMDGPU.bfi(i32 %src0, i32 %src1, i32 %src1) nounwind readnone
@@ -12,8 +12,8 @@ define void @bfi_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %
ret void
}
-; FUNC-LABEL: @bfi_arg_arg_imm
-; SI: V_BFI_B32
+; FUNC-LABEL: {{^}}bfi_arg_arg_imm:
+; SI: v_bfi_b32
; EG: BFI_INT
define void @bfi_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
%bfi = call i32 @llvm.AMDGPU.bfi(i32 %src0, i32 %src1, i32 123) nounwind readnone
@@ -21,8 +21,8 @@ define void @bfi_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounw
ret void
}
-; FUNC-LABEL: @bfi_arg_imm_arg
-; SI: V_BFI_B32
+; FUNC-LABEL: {{^}}bfi_arg_imm_arg:
+; SI: v_bfi_b32
; EG: BFI_INT
define void @bfi_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind {
%bfi = call i32 @llvm.AMDGPU.bfi(i32 %src0, i32 123, i32 %src2) nounwind readnone
@@ -30,8 +30,8 @@ define void @bfi_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounw
ret void
}
-; FUNC-LABEL: @bfi_imm_arg_arg
-; SI: V_BFI_B32
+; FUNC-LABEL: {{^}}bfi_imm_arg_arg:
+; SI: v_bfi_b32
; EG: BFI_INT
define void @bfi_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind {
%bfi = call i32 @llvm.AMDGPU.bfi(i32 123, i32 %src1, i32 %src2) nounwind readnone
diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfm.ll b/test/CodeGen/R600/llvm.AMDGPU.bfm.ll
index ef8721e4a978..80aecc743182 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.bfm.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.bfm.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare i32 @llvm.AMDGPU.bfm(i32, i32) nounwind readnone
-; FUNC-LABEL: @bfm_arg_arg
-; SI: V_BFM
+; FUNC-LABEL: {{^}}bfm_arg_arg:
+; SI: v_bfm
; EG: BFM_INT
define void @bfm_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
%bfm = call i32 @llvm.AMDGPU.bfm(i32 %src0, i32 %src1) nounwind readnone
@@ -12,8 +12,8 @@ define void @bfm_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind
ret void
}
-; FUNC-LABEL: @bfm_arg_imm
-; SI: V_BFM
+; FUNC-LABEL: {{^}}bfm_arg_imm:
+; SI: v_bfm
; EG: BFM_INT
define void @bfm_arg_imm(i32 addrspace(1)* %out, i32 %src0) nounwind {
%bfm = call i32 @llvm.AMDGPU.bfm(i32 %src0, i32 123) nounwind readnone
@@ -21,8 +21,8 @@ define void @bfm_arg_imm(i32 addrspace(1)* %out, i32 %src0) nounwind {
ret void
}
-; FUNC-LABEL: @bfm_imm_arg
-; SI: V_BFM
+; FUNC-LABEL: {{^}}bfm_imm_arg:
+; SI: v_bfm
; EG: BFM_INT
define void @bfm_imm_arg(i32 addrspace(1)* %out, i32 %src1) nounwind {
%bfm = call i32 @llvm.AMDGPU.bfm(i32 123, i32 %src1) nounwind readnone
@@ -30,8 +30,8 @@ define void @bfm_imm_arg(i32 addrspace(1)* %out, i32 %src1) nounwind {
ret void
}
-; FUNC-LABEL: @bfm_imm_imm
-; SI: V_BFM
+; FUNC-LABEL: {{^}}bfm_imm_imm:
+; SI: v_bfm
; EG: BFM_INT
define void @bfm_imm_imm(i32 addrspace(1)* %out) nounwind {
%bfm = call i32 @llvm.AMDGPU.bfm(i32 123, i32 456) nounwind readnone
diff --git a/test/CodeGen/R600/llvm.AMDGPU.brev.ll b/test/CodeGen/R600/llvm.AMDGPU.brev.ll
index 68a5ad0649c2..dd4459be536b 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.brev.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.brev.ll
@@ -1,24 +1,24 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
declare i32 @llvm.AMDGPU.brev(i32) nounwind readnone
-; FUNC-LABEL: @s_brev_i32:
-; SI: S_LOAD_DWORD [[VAL:s[0-9]+]],
-; SI: S_BREV_B32 [[SRESULT:s[0-9]+]], [[VAL]]
-; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
-; SI: BUFFER_STORE_DWORD [[VRESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}s_brev_i32:
+; SI: s_load_dword [[VAL:s[0-9]+]],
+; SI: s_brev_b32 [[SRESULT:s[0-9]+]], [[VAL]]
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: buffer_store_dword [[VRESULT]],
+; SI: s_endpgm
define void @s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
%ctlz = call i32 @llvm.AMDGPU.brev(i32 %val) nounwind readnone
store i32 %ctlz, i32 addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @v_brev_i32:
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_BFREV_B32_e32 [[RESULT:v[0-9]+]], [[VAL]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_brev_i32:
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_bfrev_b32_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
define void @v_brev_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32 addrspace(1)* %valptr, align 4
%ctlz = call i32 @llvm.AMDGPU.brev(i32 %val) nounwind readnone
diff --git a/test/CodeGen/R600/llvm.AMDGPU.clamp.ll b/test/CodeGen/R600/llvm.AMDGPU.clamp.ll
index d608953a0dd2..b9d5f6fbac07 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.clamp.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.clamp.ll
@@ -1,14 +1,15 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+declare float @llvm.fabs.f32(float) nounwind readnone
declare float @llvm.AMDGPU.clamp.f32(float, float, float) nounwind readnone
declare float @llvm.AMDIL.clamp.f32(float, float, float) nounwind readnone
-; FUNC-LABEL: @clamp_0_1_f32
-; SI: S_LOAD_DWORD [[ARG:s[0-9]+]],
-; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], [[ARG]], 0, 1, 0
-; SI: BUFFER_STORE_DWORD [[RESULT]]
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}clamp_0_1_f32:
+; SI: s_load_dword [[ARG:s[0-9]+]],
+; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], 0, [[ARG]] clamp{{$}}
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
; EG: MOV_SAT
define void @clamp_0_1_f32(float addrspace(1)* %out, float %src) nounwind {
@@ -17,10 +18,47 @@ define void @clamp_0_1_f32(float addrspace(1)* %out, float %src) nounwind {
ret void
}
-; FUNC-LABEL: @clamp_0_1_amdil_legacy_f32
-; SI: S_LOAD_DWORD [[ARG:s[0-9]+]],
-; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], [[ARG]], 0, 1, 0
-; SI: BUFFER_STORE_DWORD [[RESULT]]
+; FUNC-LABEL: {{^}}clamp_fabs_0_1_f32:
+; SI: s_load_dword [[ARG:s[0-9]+]],
+; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], 0, |[[ARG]]| clamp{{$}}
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @clamp_fabs_0_1_f32(float addrspace(1)* %out, float %src) nounwind {
+ %src.fabs = call float @llvm.fabs.f32(float %src) nounwind readnone
+ %clamp = call float @llvm.AMDGPU.clamp.f32(float %src.fabs, float 0.0, float 1.0) nounwind readnone
+ store float %clamp, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}clamp_fneg_0_1_f32:
+; SI: s_load_dword [[ARG:s[0-9]+]],
+; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], 0, -[[ARG]] clamp{{$}}
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @clamp_fneg_0_1_f32(float addrspace(1)* %out, float %src) nounwind {
+ %src.fneg = fsub float -0.0, %src
+ %clamp = call float @llvm.AMDGPU.clamp.f32(float %src.fneg, float 0.0, float 1.0) nounwind readnone
+ store float %clamp, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}clamp_fneg_fabs_0_1_f32:
+; SI: s_load_dword [[ARG:s[0-9]+]],
+; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], 0, -|[[ARG]]| clamp{{$}}
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @clamp_fneg_fabs_0_1_f32(float addrspace(1)* %out, float %src) nounwind {
+ %src.fabs = call float @llvm.fabs.f32(float %src) nounwind readnone
+ %src.fneg.fabs = fsub float -0.0, %src.fabs
+ %clamp = call float @llvm.AMDGPU.clamp.f32(float %src.fneg.fabs, float 0.0, float 1.0) nounwind readnone
+ store float %clamp, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}clamp_0_1_amdil_legacy_f32:
+; SI: s_load_dword [[ARG:s[0-9]+]],
+; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], 0, [[ARG]] clamp{{$}}
+; SI: buffer_store_dword [[RESULT]]
define void @clamp_0_1_amdil_legacy_f32(float addrspace(1)* %out, float %src) nounwind {
%clamp = call float @llvm.AMDIL.clamp.f32(float %src, float 0.0, float 1.0) nounwind readnone
store float %clamp, float addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/llvm.AMDGPU.class.ll b/test/CodeGen/R600/llvm.AMDGPU.class.ll
new file mode 100644
index 000000000000..974e3c71e622
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.class.ll
@@ -0,0 +1,497 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare i1 @llvm.AMDGPU.class.f32(float, i32) #1
+declare i1 @llvm.AMDGPU.class.f64(double, i32) #1
+declare i32 @llvm.r600.read.tidig.x() #1
+declare float @llvm.fabs.f32(float) #1
+declare double @llvm.fabs.f64(double) #1
+
+; SI-LABEL: {{^}}test_class_f32:
+; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: v_cmp_class_f32_e32 vcc, [[SA]], [[VB]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
+; SI-NEXT: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 {
+ %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 %b) #1
+ %sext = sext i1 %result to i32
+ store i32 %sext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_class_fabs_f32:
+; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: v_cmp_class_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], |[[SA]]|, [[VB]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]]
+; SI-NEXT: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_fabs_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 {
+ %a.fabs = call float @llvm.fabs.f32(float %a) #1
+ %result = call i1 @llvm.AMDGPU.class.f32(float %a.fabs, i32 %b) #1
+ %sext = sext i1 %result to i32
+ store i32 %sext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_class_fneg_f32:
+; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: v_cmp_class_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -[[SA]], [[VB]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]]
+; SI-NEXT: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_fneg_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 {
+ %a.fneg = fsub float -0.0, %a
+ %result = call i1 @llvm.AMDGPU.class.f32(float %a.fneg, i32 %b) #1
+ %sext = sext i1 %result to i32
+ store i32 %sext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_class_fneg_fabs_f32:
+; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: v_cmp_class_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -|[[SA]]|, [[VB]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]]
+; SI-NEXT: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_fneg_fabs_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 {
+ %a.fabs = call float @llvm.fabs.f32(float %a) #1
+ %a.fneg.fabs = fsub float -0.0, %a.fabs
+ %result = call i1 @llvm.AMDGPU.class.f32(float %a.fneg.fabs, i32 %b) #1
+ %sext = sext i1 %result to i32
+ store i32 %sext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_class_1_f32:
+; SI: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: v_cmp_class_f32_e64 [[COND:s\[[0-9]+:[0-9]+\]]], [[SA]], 1{{$}}
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[COND]]
+; SI-NEXT: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_1_f32(i32 addrspace(1)* %out, float %a) #0 {
+ %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
+ %sext = sext i1 %result to i32
+ store i32 %sext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_class_64_f32:
+; SI: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: v_cmp_class_f32_e64 [[COND:s\[[0-9]+:[0-9]+\]]], [[SA]], 64{{$}}
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[COND]]
+; SI-NEXT: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_64_f32(i32 addrspace(1)* %out, float %a) #0 {
+ %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 64) #1
+ %sext = sext i1 %result to i32
+ store i32 %sext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; Set all 10 bits of mask
+; SI-LABEL: {{^}}test_class_full_mask_f32:
+; SI: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x3ff{{$}}
+; SI: v_cmp_class_f32_e32 vcc, [[SA]], [[MASK]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
+; SI-NEXT: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_full_mask_f32(i32 addrspace(1)* %out, float %a) #0 {
+ %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1023) #1
+ %sext = sext i1 %result to i32
+ store i32 %sext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_class_9bit_mask_f32:
+; SI: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}}
+; SI: v_cmp_class_f32_e32 vcc, [[SA]], [[MASK]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
+; SI-NEXT: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_9bit_mask_f32(i32 addrspace(1)* %out, float %a) #0 {
+ %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 511) #1
+ %sext = sext i1 %result to i32
+ store i32 %sext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}v_test_class_full_mask_f32:
+; SI-DAG: buffer_load_dword [[VA:v[0-9]+]]
+; SI-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}}
+; SI: v_cmp_class_f32_e32 vcc, [[VA]], [[MASK]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @v_test_class_full_mask_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.in = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %a = load float addrspace(1)* %gep.in
+
+ %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 511) #1
+ %sext = sext i1 %result to i32
+ store i32 %sext, i32 addrspace(1)* %gep.out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_class_inline_imm_constant_dynamic_mask_f32:
+; SI-DAG: buffer_load_dword [[VB:v[0-9]+]]
+; SI: v_cmp_class_f32_e32 vcc, 1.0, [[VB]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_inline_imm_constant_dynamic_mask_f32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %b = load i32 addrspace(1)* %gep.in
+
+ %result = call i1 @llvm.AMDGPU.class.f32(float 1.0, i32 %b) #1
+ %sext = sext i1 %result to i32
+ store i32 %sext, i32 addrspace(1)* %gep.out, align 4
+ ret void
+}
+
+; FIXME: Why isn't this using a literal constant operand?
+; SI-LABEL: {{^}}test_class_lit_constant_dynamic_mask_f32:
+; SI-DAG: buffer_load_dword [[VB:v[0-9]+]]
+; SI-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
+; SI: v_cmp_class_f32_e32 vcc, [[VK]], [[VB]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_lit_constant_dynamic_mask_f32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %b = load i32 addrspace(1)* %gep.in
+
+ %result = call i1 @llvm.AMDGPU.class.f32(float 1024.0, i32 %b) #1
+ %sext = sext i1 %result to i32
+ store i32 %sext, i32 addrspace(1)* %gep.out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_class_f64:
+; SI-DAG: s_load_dwordx2 [[SA:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: v_cmp_class_f64_e32 vcc, [[SA]], [[VB]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
+; SI-NEXT: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 {
+ %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 %b) #1
+ %sext = sext i1 %result to i32
+ store i32 %sext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_class_fabs_f64:
+; SI-DAG: s_load_dwordx2 [[SA:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: v_cmp_class_f64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], |[[SA]]|, [[VB]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]]
+; SI-NEXT: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_fabs_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 {
+ %a.fabs = call double @llvm.fabs.f64(double %a) #1
+ %result = call i1 @llvm.AMDGPU.class.f64(double %a.fabs, i32 %b) #1
+ %sext = sext i1 %result to i32
+ store i32 %sext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_class_fneg_f64:
+; SI-DAG: s_load_dwordx2 [[SA:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: v_cmp_class_f64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -[[SA]], [[VB]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]]
+; SI-NEXT: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_fneg_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 {
+ %a.fneg = fsub double -0.0, %a
+ %result = call i1 @llvm.AMDGPU.class.f64(double %a.fneg, i32 %b) #1
+ %sext = sext i1 %result to i32
+ store i32 %sext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_class_fneg_fabs_f64:
+; SI-DAG: s_load_dwordx2 [[SA:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: v_cmp_class_f64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -|[[SA]]|, [[VB]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]]
+; SI-NEXT: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_fneg_fabs_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 {
+ %a.fabs = call double @llvm.fabs.f64(double %a) #1
+ %a.fneg.fabs = fsub double -0.0, %a.fabs
+ %result = call i1 @llvm.AMDGPU.class.f64(double %a.fneg.fabs, i32 %b) #1
+ %sext = sext i1 %result to i32
+ store i32 %sext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_class_1_f64:
+; SI: v_cmp_class_f64_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 1{{$}}
+; SI: s_endpgm
+define void @test_class_1_f64(i32 addrspace(1)* %out, double %a) #0 {
+ %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 1) #1
+ %sext = sext i1 %result to i32
+ store i32 %sext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_class_64_f64:
+; SI: v_cmp_class_f64_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 64{{$}}
+; SI: s_endpgm
+define void @test_class_64_f64(i32 addrspace(1)* %out, double %a) #0 {
+ %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 64) #1
+ %sext = sext i1 %result to i32
+ store i32 %sext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; Set all 9 bits of mask
+; SI-LABEL: {{^}}test_class_full_mask_f64:
+; SI: s_load_dwordx2 [[SA:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}}
+; SI: v_cmp_class_f64_e32 vcc, [[SA]], [[MASK]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
+; SI-NEXT: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_full_mask_f64(i32 addrspace(1)* %out, double %a) #0 {
+ %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 511) #1
+ %sext = sext i1 %result to i32
+ store i32 %sext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}v_test_class_full_mask_f64:
+; SI-DAG: buffer_load_dwordx2 [[VA:v\[[0-9]+:[0-9]+\]]]
+; SI-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}}
+; SI: v_cmp_class_f64_e32 vcc, [[VA]], [[MASK]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @v_test_class_full_mask_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.in = getelementptr double addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %a = load double addrspace(1)* %in
+
+ %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 511) #1
+ %sext = sext i1 %result to i32
+ store i32 %sext, i32 addrspace(1)* %gep.out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_class_inline_imm_constant_dynamic_mask_f64:
+; XSI: v_cmp_class_f64_e32 vcc, 1.0,
+; SI: v_cmp_class_f64_e32 vcc,
+; SI: s_endpgm
+define void @test_class_inline_imm_constant_dynamic_mask_f64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %b = load i32 addrspace(1)* %gep.in
+
+ %result = call i1 @llvm.AMDGPU.class.f64(double 1.0, i32 %b) #1
+ %sext = sext i1 %result to i32
+ store i32 %sext, i32 addrspace(1)* %gep.out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_class_lit_constant_dynamic_mask_f64:
+; SI: v_cmp_class_f64_e32 vcc, s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
+; SI: s_endpgm
+define void @test_class_lit_constant_dynamic_mask_f64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %b = load i32 addrspace(1)* %gep.in
+
+ %result = call i1 @llvm.AMDGPU.class.f64(double 1024.0, i32 %b) #1
+ %sext = sext i1 %result to i32
+ store i32 %sext, i32 addrspace(1)* %gep.out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_fold_or_class_f32_0:
+; SI-NOT: v_cmp_class
+; SI: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 3{{$}}
+; SI-NOT: v_cmp_class
+; SI: s_endpgm
+define void @test_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.in = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %a = load float addrspace(1)* %gep.in
+
+ %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
+ %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 3) #1
+ %or = or i1 %class0, %class1
+
+ %sext = sext i1 %or to i32
+ store i32 %sext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_fold_or3_class_f32_0:
+; SI-NOT: v_cmp_class
+; SI: v_cmp_class_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 7{{$}}
+; SI-NOT: v_cmp_class
+; SI: s_endpgm
+define void @test_fold_or3_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.in = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %a = load float addrspace(1)* %gep.in
+
+ %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
+ %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 2) #1
+ %class2 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1
+ %or.0 = or i1 %class0, %class1
+ %or.1 = or i1 %or.0, %class2
+
+ %sext = sext i1 %or.1 to i32
+ store i32 %sext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_fold_or_all_tests_class_f32_0:
+; SI-NOT: v_cmp_class
+; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x3ff{{$}}
+; SI: v_cmp_class_f32_e32 vcc, v{{[0-9]+}}, [[MASK]]{{$}}
+; SI-NOT: v_cmp_class
+; SI: s_endpgm
+define void @test_fold_or_all_tests_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.in = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %a = load float addrspace(1)* %gep.in
+
+ %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
+ %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 2) #1
+ %class2 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1
+ %class3 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 8) #1
+ %class4 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 16) #1
+ %class5 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 32) #1
+ %class6 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 64) #1
+ %class7 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 128) #1
+ %class8 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 256) #1
+ %class9 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 512) #1
+ %or.0 = or i1 %class0, %class1
+ %or.1 = or i1 %or.0, %class2
+ %or.2 = or i1 %or.1, %class3
+ %or.3 = or i1 %or.2, %class4
+ %or.4 = or i1 %or.3, %class5
+ %or.5 = or i1 %or.4, %class6
+ %or.6 = or i1 %or.5, %class7
+ %or.7 = or i1 %or.6, %class8
+ %or.8 = or i1 %or.7, %class9
+ %sext = sext i1 %or.8 to i32
+ store i32 %sext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_fold_or_class_f32_1:
+; SI-NOT: v_cmp_class
+; SI: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 12{{$}}
+; SI-NOT: v_cmp_class
+; SI: s_endpgm
+define void @test_fold_or_class_f32_1(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.in = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %a = load float addrspace(1)* %gep.in
+
+ %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1
+ %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 8) #1
+ %or = or i1 %class0, %class1
+
+ %sext = sext i1 %or to i32
+ store i32 %sext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_fold_or_class_f32_2:
+; SI-NOT: v_cmp_class
+; SI: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 7{{$}}
+; SI-NOT: v_cmp_class
+; SI: s_endpgm
+define void @test_fold_or_class_f32_2(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.in = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %a = load float addrspace(1)* %gep.in
+
+ %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 7) #1
+ %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 7) #1
+ %or = or i1 %class0, %class1
+
+ %sext = sext i1 %or to i32
+ store i32 %sext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_no_fold_or_class_f32_0:
+; SI-DAG: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 4{{$}}
+; SI-DAG: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 8{{$}}
+; SI: s_or_b64
+; SI: s_endpgm
+define void @test_no_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in, float %b) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.in = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %a = load float addrspace(1)* %gep.in
+
+ %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1
+ %class1 = call i1 @llvm.AMDGPU.class.f32(float %b, i32 8) #1
+ %or = or i1 %class0, %class1
+
+ %sext = sext i1 %or to i32
+ store i32 %sext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_class_0_f32:
+; SI-NOT: v_cmp_class
+; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
+; SI-NEXT: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_0_f32(i32 addrspace(1)* %out, float %a) #0 {
+ %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 0) #1
+ %sext = sext i1 %result to i32
+ store i32 %sext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_class_0_f64:
+; SI-NOT: v_cmp_class
+; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_0_f64(i32 addrspace(1)* %out, double %a) #0 {
+ %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 0) #1
+ %sext = sext i1 %result to i32
+ store i32 %sext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/llvm.AMDGPU.cube.ll b/test/CodeGen/R600/llvm.AMDGPU.cube.ll
index 110bbfde68b9..aa07afdebea6 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.cube.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.cube.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-; CHECK: @cube
+; CHECK: {{^}}cube:
; CHECK: CUBE T{{[0-9]}}.X
; CHECK: CUBE T{{[0-9]}}.Y
; CHECK: CUBE T{{[0-9]}}.Z
diff --git a/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll b/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll
index 6facb4782e98..4d187d5193c5 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll
@@ -1,12 +1,12 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI %s
declare float @llvm.AMDGPU.cvt.f32.ubyte0(i32) nounwind readnone
declare float @llvm.AMDGPU.cvt.f32.ubyte1(i32) nounwind readnone
declare float @llvm.AMDGPU.cvt.f32.ubyte2(i32) nounwind readnone
declare float @llvm.AMDGPU.cvt.f32.ubyte3(i32) nounwind readnone
-; SI-LABEL: @test_unpack_byte0_to_float:
-; SI: V_CVT_F32_UBYTE0
+; SI-LABEL: {{^}}test_unpack_byte0_to_float:
+; SI: v_cvt_f32_ubyte0
define void @test_unpack_byte0_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%val = load i32 addrspace(1)* %in, align 4
%cvt = call float @llvm.AMDGPU.cvt.f32.ubyte0(i32 %val) nounwind readnone
@@ -14,8 +14,8 @@ define void @test_unpack_byte0_to_float(float addrspace(1)* %out, i32 addrspace(
ret void
}
-; SI-LABEL: @test_unpack_byte1_to_float:
-; SI: V_CVT_F32_UBYTE1
+; SI-LABEL: {{^}}test_unpack_byte1_to_float:
+; SI: v_cvt_f32_ubyte1
define void @test_unpack_byte1_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%val = load i32 addrspace(1)* %in, align 4
%cvt = call float @llvm.AMDGPU.cvt.f32.ubyte1(i32 %val) nounwind readnone
@@ -23,8 +23,8 @@ define void @test_unpack_byte1_to_float(float addrspace(1)* %out, i32 addrspace(
ret void
}
-; SI-LABEL: @test_unpack_byte2_to_float:
-; SI: V_CVT_F32_UBYTE2
+; SI-LABEL: {{^}}test_unpack_byte2_to_float:
+; SI: v_cvt_f32_ubyte2
define void @test_unpack_byte2_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%val = load i32 addrspace(1)* %in, align 4
%cvt = call float @llvm.AMDGPU.cvt.f32.ubyte2(i32 %val) nounwind readnone
@@ -32,8 +32,8 @@ define void @test_unpack_byte2_to_float(float addrspace(1)* %out, i32 addrspace(
ret void
}
-; SI-LABEL: @test_unpack_byte3_to_float:
-; SI: V_CVT_F32_UBYTE3
+; SI-LABEL: {{^}}test_unpack_byte3_to_float:
+; SI: v_cvt_f32_ubyte3
define void @test_unpack_byte3_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%val = load i32 addrspace(1)* %in, align 4
%cvt = call float @llvm.AMDGPU.cvt.f32.ubyte3(i32 %val) nounwind readnone
diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll b/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll
index c8c73573e073..52d0519ef277 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll
@@ -1,25 +1,25 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
declare float @llvm.AMDGPU.div.fixup.f32(float, float, float) nounwind readnone
declare double @llvm.AMDGPU.div.fixup.f64(double, double, double) nounwind readnone
-; SI-LABEL: @test_div_fixup_f32:
-; SI-DAG: S_LOAD_DWORD [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: S_LOAD_DWORD [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; SI-DAG: V_MOV_B32_e32 [[VC:v[0-9]+]], [[SC]]
-; SI-DAG: S_LOAD_DWORD [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: V_MOV_B32_e32 [[VB:v[0-9]+]], [[SB]]
-; SI: V_DIV_FIXUP_F32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}test_div_fixup_f32:
+; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
+; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: v_div_fixup_f32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
define void @test_div_fixup_f32(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
%result = call float @llvm.AMDGPU.div.fixup.f32(float %a, float %b, float %c) nounwind readnone
store float %result, float addrspace(1)* %out, align 4
ret void
}
-; SI-LABEL: @test_div_fixup_f64:
-; SI: V_DIV_FIXUP_F64
+; SI-LABEL: {{^}}test_div_fixup_f64:
+; SI: v_div_fixup_f64
define void @test_div_fixup_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind {
%result = call double @llvm.AMDGPU.div.fixup.f64(double %a, double %b, double %c) nounwind readnone
store double %result, double addrspace(1)* %out, align 8
diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll b/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
index 4f1e827c2cbd..2c9085e926dd 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
@@ -1,27 +1,27 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-declare float @llvm.AMDGPU.div.fmas.f32(float, float, float) nounwind readnone
-declare double @llvm.AMDGPU.div.fmas.f64(double, double, double) nounwind readnone
+declare float @llvm.AMDGPU.div.fmas.f32(float, float, float, i1) nounwind readnone
+declare double @llvm.AMDGPU.div.fmas.f64(double, double, double, i1) nounwind readnone
-; SI-LABEL: @test_div_fmas_f32:
-; SI-DAG: S_LOAD_DWORD [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: S_LOAD_DWORD [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; SI-DAG: V_MOV_B32_e32 [[VC:v[0-9]+]], [[SC]]
-; SI-DAG: S_LOAD_DWORD [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: V_MOV_B32_e32 [[VB:v[0-9]+]], [[SB]]
-; SI: V_DIV_FMAS_F32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
-define void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
- %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c) nounwind readnone
+; SI-LABEL: {{^}}test_div_fmas_f32:
+; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
+; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: v_div_fmas_f32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind {
+ %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %d) nounwind readnone
store float %result, float addrspace(1)* %out, align 4
ret void
}
-; SI-LABEL: @test_div_fmas_f64:
-; SI: V_DIV_FMAS_F64
-define void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind {
- %result = call double @llvm.AMDGPU.div.fmas.f64(double %a, double %b, double %c) nounwind readnone
+; SI-LABEL: {{^}}test_div_fmas_f64:
+; SI: v_div_fmas_f64
+define void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c, i1 %d) nounwind {
+ %result = call double @llvm.AMDGPU.div.fmas.f64(double %a, double %b, double %c, i1 %d) nounwind readnone
store double %result, double addrspace(1)* %out, align 8
ret void
}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll b/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
index 527c8da10a3c..32e6eaa1a7a9 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
@@ -1,13 +1,23 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
declare { float, i1 } @llvm.AMDGPU.div.scale.f32(float, float, i1) nounwind readnone
declare { double, i1 } @llvm.AMDGPU.div.scale.f64(double, double, i1) nounwind readnone
; SI-LABEL @test_div_scale_f32_1:
-; SI: V_DIV_SCALE_F32
-define void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr) nounwind {
- %a = load float addrspace(1)* %aptr, align 4
- %b = load float addrspace(1)* %bptr, align 4
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+
+ %a = load float addrspace(1)* %gep.0, align 4
+ %b = load float addrspace(1)* %gep.1, align 4
+
%result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, float addrspace(1)* %out, align 4
@@ -15,10 +25,19 @@ define void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)*
}
; SI-LABEL @test_div_scale_f32_2:
-; SI: V_DIV_SCALE_F32
-define void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr) nounwind {
- %a = load float addrspace(1)* %aptr, align 4
- %b = load float addrspace(1)* %bptr, align 4
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+
+ %a = load float addrspace(1)* %gep.0, align 4
+ %b = load float addrspace(1)* %gep.1, align 4
+
%result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, float addrspace(1)* %out, align 4
@@ -26,10 +45,19 @@ define void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)*
}
; SI-LABEL @test_div_scale_f64_1:
-; SI: V_DIV_SCALE_F64
-define void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %bptr, double addrspace(1)* %cptr) nounwind {
- %a = load double addrspace(1)* %aptr, align 8
- %b = load double addrspace(1)* %bptr, align 8
+; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
+; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
+; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
+; SI: buffer_store_dwordx2 [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+
+ %a = load double addrspace(1)* %gep.0, align 8
+ %b = load double addrspace(1)* %gep.1, align 8
+
%result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
store double %result0, double addrspace(1)* %out, align 8
@@ -37,10 +65,221 @@ define void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1)
}
; SI-LABEL @test_div_scale_f64_1:
-; SI: V_DIV_SCALE_F64
-define void @test_div_scale_f64_2(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %bptr, double addrspace(1)* %cptr) nounwind {
- %a = load double addrspace(1)* %aptr, align 8
- %b = load double addrspace(1)* %bptr, align 8
+; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
+; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
+; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
+; SI: buffer_store_dwordx2 [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f64_2(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+
+ %a = load double addrspace(1)* %gep.0, align 8
+ %b = load double addrspace(1)* %gep.1, align 8
+
+ %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
+ %result0 = extractvalue { double, i1 } %result, 0
+ store double %result0, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; SI-LABEL @test_div_scale_f32_scalar_num_1:
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]]
+; SI-DAG: s_load_dword [[A:s[0-9]+]]
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f32_scalar_num_1(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep = getelementptr float addrspace(1)* %in, i32 %tid
+
+ %b = load float addrspace(1)* %gep, align 4
+
+ %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
+ %result0 = extractvalue { float, i1 } %result, 0
+ store float %result0, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL @test_div_scale_f32_scalar_num_2:
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]]
+; SI-DAG: s_load_dword [[A:s[0-9]+]]
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f32_scalar_num_2(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep = getelementptr float addrspace(1)* %in, i32 %tid
+
+ %b = load float addrspace(1)* %gep, align 4
+
+ %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
+ %result0 = extractvalue { float, i1 } %result, 0
+ store float %result0, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL @test_div_scale_f32_scalar_den_1:
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]]
+; SI-DAG: s_load_dword [[B:s[0-9]+]]
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f32_scalar_den_1(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep = getelementptr float addrspace(1)* %in, i32 %tid
+
+ %a = load float addrspace(1)* %gep, align 4
+
+ %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
+ %result0 = extractvalue { float, i1 } %result, 0
+ store float %result0, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL @test_div_scale_f32_scalar_den_2:
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]]
+; SI-DAG: s_load_dword [[B:s[0-9]+]]
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f32_scalar_den_2(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep = getelementptr float addrspace(1)* %in, i32 %tid
+
+ %a = load float addrspace(1)* %gep, align 4
+
+ %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
+ %result0 = extractvalue { float, i1 } %result, 0
+ store float %result0, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL @test_div_scale_f64_scalar_num_1:
+; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]]
+; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
+; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
+; SI: buffer_store_dwordx2 [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f64_scalar_num_1(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep = getelementptr double addrspace(1)* %in, i32 %tid
+
+ %b = load double addrspace(1)* %gep, align 8
+
+ %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
+ %result0 = extractvalue { double, i1 } %result, 0
+ store double %result0, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; SI-LABEL @test_div_scale_f64_scalar_num_2:
+; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]]
+; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
+; SI: buffer_store_dwordx2 [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f64_scalar_num_2(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep = getelementptr double addrspace(1)* %in, i32 %tid
+
+ %b = load double addrspace(1)* %gep, align 8
+
+ %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
+ %result0 = extractvalue { double, i1 } %result, 0
+ store double %result0, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; SI-LABEL @test_div_scale_f64_scalar_den_1:
+; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
+; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
+; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
+; SI: buffer_store_dwordx2 [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f64_scalar_den_1(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep = getelementptr double addrspace(1)* %in, i32 %tid
+
+ %a = load double addrspace(1)* %gep, align 8
+
+ %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
+ %result0 = extractvalue { double, i1 } %result, 0
+ store double %result0, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; SI-LABEL @test_div_scale_f64_scalar_den_2:
+; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
+; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
+; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
+; SI: buffer_store_dwordx2 [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f64_scalar_den_2(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep = getelementptr double addrspace(1)* %in, i32 %tid
+
+ %a = load double addrspace(1)* %gep, align 8
+
+ %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
+ %result0 = extractvalue { double, i1 } %result, 0
+ store double %result0, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; SI-LABEL @test_div_scale_f32_all_scalar_1:
+; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[VA]]
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f32_all_scalar_1(float addrspace(1)* %out, float %a, float %b) nounwind {
+ %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
+ %result0 = extractvalue { float, i1 } %result, 0
+ store float %result0, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL @test_div_scale_f32_all_scalar_2:
+; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[B]]
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[VB]], [[A]]
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f32_all_scalar_2(float addrspace(1)* %out, float %a, float %b) nounwind {
+ %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
+ %result0 = extractvalue { float, i1 } %result, 0
+ store float %result0, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL @test_div_scale_f64_all_scalar_1:
+; SI-DAG: s_load_dwordx2 s{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: v_mov_b32_e32 v[[VA_LO:[0-9]+]], s[[A_LO]]
+; SI-DAG: v_mov_b32_e32 v[[VA_HI:[0-9]+]], s[[A_HI]]
+; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], v{{\[}}[[VA_LO]]:[[VA_HI]]{{\]}}
+; SI: buffer_store_dwordx2 [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f64_all_scalar_1(double addrspace(1)* %out, double %a, double %b) nounwind {
+ %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
+ %result0 = extractvalue { double, i1 } %result, 0
+ store double %result0, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; SI-LABEL @test_div_scale_f64_all_scalar_2:
+; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dwordx2 s{{\[}}[[B_LO:[0-9]+]]:[[B_HI:[0-9]+]]{{\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: v_mov_b32_e32 v[[VB_LO:[0-9]+]], s[[B_LO]]
+; SI-DAG: v_mov_b32_e32 v[[VB_HI:[0-9]+]], s[[B_HI]]
+; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], v{{\[}}[[VB_LO]]:[[VB_HI]]{{\]}}, [[A]]
+; SI: buffer_store_dwordx2 [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f64_all_scalar_2(double addrspace(1)* %out, double %a, double %b) nounwind {
%result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
store double %result0, double addrspace(1)* %out, align 8
diff --git a/test/CodeGen/R600/llvm.AMDGPU.fract.ll b/test/CodeGen/R600/llvm.AMDGPU.fract.ll
index 72ec1c57571e..df43b0d9063d 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.fract.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.fract.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare float @llvm.AMDGPU.fract.f32(float) nounwind readnone
@@ -6,8 +6,8 @@ declare float @llvm.AMDGPU.fract.f32(float) nounwind readnone
; Legacy name
declare float @llvm.AMDIL.fraction.f32(float) nounwind readnone
-; FUNC-LABEL: @fract_f32
-; SI: V_FRACT_F32
+; FUNC-LABEL: {{^}}fract_f32:
+; SI: v_fract_f32
; EG: FRACT
define void @fract_f32(float addrspace(1)* %out, float addrspace(1)* %src) nounwind {
%val = load float addrspace(1)* %src, align 4
@@ -16,8 +16,8 @@ define void @fract_f32(float addrspace(1)* %out, float addrspace(1)* %src) nounw
ret void
}
-; FUNC-LABEL: @fract_f32_legacy_amdil
-; SI: V_FRACT_F32
+; FUNC-LABEL: {{^}}fract_f32_legacy_amdil:
+; SI: v_fract_f32
; EG: FRACT
define void @fract_f32_legacy_amdil(float addrspace(1)* %out, float addrspace(1)* %src) nounwind {
%val = load float addrspace(1)* %src, align 4
diff --git a/test/CodeGen/R600/llvm.AMDGPU.imad24.ll b/test/CodeGen/R600/llvm.AMDGPU.imad24.ll
index 95795ea63b93..26a370436fd3 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.imad24.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.imad24.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
@@ -8,8 +8,8 @@
declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) nounwind readnone
-; FUNC-LABEL: @test_imad24
-; SI: V_MAD_I32_I24
+; FUNC-LABEL: {{^}}test_imad24:
+; SI: v_mad_i32_i24
; CM: MULADD_INT24
; R600: MULLO_INT
; R600: ADD_INT
diff --git a/test/CodeGen/R600/llvm.AMDGPU.imax.ll b/test/CodeGen/R600/llvm.AMDGPU.imax.ll
index 01c9f435b9fc..ec8f001a1a63 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.imax.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.imax.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s
+; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s
-; SI-LABEL: @vector_imax
-; SI: V_MAX_I32_e32
+; SI-LABEL: {{^}}vector_imax:
+; SI: v_max_i32_e32
define void @vector_imax(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
main_body:
%load = load i32 addrspace(1)* %in, align 4
@@ -11,8 +11,8 @@ main_body:
ret void
}
-; SI-LABEL: @scalar_imax
-; SI: S_MAX_I32
+; SI-LABEL: {{^}}scalar_imax:
+; SI: s_max_i32
define void @scalar_imax(i32 %p0, i32 %p1) #0 {
entry:
%max = call i32 @llvm.AMDGPU.imax(i32 %p0, i32 %p1)
@@ -29,4 +29,4 @@ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
-!0 = metadata !{metadata !"const", null, i32 1}
+!0 = !{!"const", null, i32 1}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.imin.ll b/test/CodeGen/R600/llvm.AMDGPU.imin.ll
index 565bf3444081..07a0fe78e2a7 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.imin.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.imin.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s
+; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s
-; SI-LABEL: @vector_imin
-; SI: V_MIN_I32_e32
+; SI-LABEL: {{^}}vector_imin:
+; SI: v_min_i32_e32
define void @vector_imin(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
main_body:
%load = load i32 addrspace(1)* %in, align 4
@@ -11,8 +11,8 @@ main_body:
ret void
}
-; SI-LABEL: @scalar_imin
-; SI: S_MIN_I32
+; SI-LABEL: {{^}}scalar_imin:
+; SI: s_min_i32
define void @scalar_imin(i32 %p0, i32 %p1) #0 {
entry:
%min = call i32 @llvm.AMDGPU.imin(i32 %p0, i32 %p1)
@@ -29,4 +29,4 @@ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
-!0 = metadata !{metadata !"const", null, i32 1}
+!0 = !{!"const", null, i32 1}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.imul24.ll b/test/CodeGen/R600/llvm.AMDGPU.imul24.ll
index 8ee3520daeae..1a9806d4b97a 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.imul24.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.imul24.ll
@@ -1,11 +1,11 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
declare i32 @llvm.AMDGPU.imul24(i32, i32) nounwind readnone
-; FUNC-LABEL: @test_imul24
-; SI: V_MUL_I32_I24
+; FUNC-LABEL: {{^}}test_imul24:
+; SI: v_mul_i32_i24
; CM: MUL_INT24
; R600: MULLO_INT
define void @test_imul24(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
diff --git a/test/CodeGen/R600/llvm.AMDGPU.kill.ll b/test/CodeGen/R600/llvm.AMDGPU.kill.ll
index 1f82ffb53f1d..3995c9a742d6 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.kill.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.kill.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-; SI-LABEL: @kill_gs_const
-; SI-NOT: V_CMPX_LE_F32
-; SI: S_MOV_B64 exec, 0
+; SI-LABEL: {{^}}kill_gs_const:
+; SI-NOT: v_cmpx_le_f32
+; SI: s_mov_b64 exec, 0
define void @kill_gs_const() #0 {
main_body:
@@ -19,4 +19,4 @@ declare void @llvm.AMDGPU.kill(float)
attributes #0 = { "ShaderType"="2" }
-!0 = metadata !{metadata !"const", null, i32 1}
+!0 = !{!"const", null, i32 1}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.ldexp.ll b/test/CodeGen/R600/llvm.AMDGPU.ldexp.ll
new file mode 100644
index 000000000000..f0b8dace17ff
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.ldexp.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.AMDGPU.ldexp.f32(float, i32) nounwind readnone
+declare double @llvm.AMDGPU.ldexp.f64(double, i32) nounwind readnone
+
+; SI-LABEL: {{^}}test_ldexp_f32:
+; SI: v_ldexp_f32
+; SI: s_endpgm
+define void @test_ldexp_f32(float addrspace(1)* %out, float %a, i32 %b) nounwind {
+ %result = call float @llvm.AMDGPU.ldexp.f32(float %a, i32 %b) nounwind readnone
+ store float %result, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_ldexp_f64:
+; SI: v_ldexp_f64
+; SI: s_endpgm
+define void @test_ldexp_f64(double addrspace(1)* %out, double %a, i32 %b) nounwind {
+ %result = call double @llvm.AMDGPU.ldexp.f64(double %a, i32 %b) nounwind readnone
+ store double %result, double addrspace(1)* %out, align 8
+ ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.legacy.rsq.ll b/test/CodeGen/R600/llvm.AMDGPU.legacy.rsq.ll
index 51964eefa64f..4cafd563685e 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.legacy.rsq.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.legacy.rsq.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare float @llvm.AMDGPU.legacy.rsq(float) nounwind readnone
-; FUNC-LABEL: @rsq_legacy_f32
-; SI: V_RSQ_LEGACY_F32_e32
+; FUNC-LABEL: {{^}}rsq_legacy_f32:
+; SI: v_rsq_legacy_f32_e32
; EG: RECIPSQRT_IEEE
define void @rsq_legacy_f32(float addrspace(1)* %out, float %src) nounwind {
%rsq = call float @llvm.AMDGPU.legacy.rsq(float %src) nounwind readnone
diff --git a/test/CodeGen/R600/llvm.AMDGPU.rcp.f64.ll b/test/CodeGen/R600/llvm.AMDGPU.rcp.f64.ll
index b5dda0ce81f9..68412950b05a 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.rcp.f64.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.rcp.f64.ll
@@ -1,27 +1,27 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone
declare double @llvm.sqrt.f64(double) nounwind readnone
-; FUNC-LABEL: @rcp_f64
-; SI: V_RCP_F64_e32
+; FUNC-LABEL: {{^}}rcp_f64:
+; SI: v_rcp_f64_e32
define void @rcp_f64(double addrspace(1)* %out, double %src) nounwind {
%rcp = call double @llvm.AMDGPU.rcp.f64(double %src) nounwind readnone
store double %rcp, double addrspace(1)* %out, align 8
ret void
}
-; FUNC-LABEL: @rcp_pat_f64
-; SI: V_RCP_F64_e32
+; FUNC-LABEL: {{^}}rcp_pat_f64:
+; SI: v_rcp_f64_e32
define void @rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind {
%rcp = fdiv double 1.0, %src
store double %rcp, double addrspace(1)* %out, align 8
ret void
}
-; FUNC-LABEL: @rsq_rcp_pat_f64
-; SI-UNSAFE: V_RSQ_F64_e32
-; SI-SAFE-NOT: V_RSQ_F64_e32
+; FUNC-LABEL: {{^}}rsq_rcp_pat_f64:
+; SI-UNSAFE: v_rsq_f64_e32
+; SI-SAFE-NOT: v_rsq_f64_e32
define void @rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind {
%sqrt = call double @llvm.sqrt.f64(double %src) nounwind readnone
%rcp = call double @llvm.AMDGPU.rcp.f64(double %sqrt) nounwind readnone
diff --git a/test/CodeGen/R600/llvm.AMDGPU.rcp.ll b/test/CodeGen/R600/llvm.AMDGPU.rcp.ll
index 8d5d66e149ba..4979a14ccdfc 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.rcp.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.rcp.ll
@@ -1,65 +1,47 @@
-; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
+; XUN: llc -march=amdgcn -mcpu=SI -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE-SPDENORM -check-prefix=SI -check-prefix=FUNC %s
-; XUN: llc -march=r600 -mcpu=SI -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE-SPDENORM -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG-SAFE -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare float @llvm.AMDGPU.rcp.f32(float) nounwind readnone
declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone
-
declare float @llvm.sqrt.f32(float) nounwind readnone
-declare double @llvm.sqrt.f64(double) nounwind readnone
-; FUNC-LABEL: @rcp_f32
-; SI: V_RCP_F32_e32
+; FUNC-LABEL: {{^}}rcp_f32:
+; SI: v_rcp_f32_e32
+; EG: RECIP_IEEE
define void @rcp_f32(float addrspace(1)* %out, float %src) nounwind {
%rcp = call float @llvm.AMDGPU.rcp.f32(float %src) nounwind readnone
store float %rcp, float addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @rcp_f64
-; SI: V_RCP_F64_e32
-define void @rcp_f64(double addrspace(1)* %out, double %src) nounwind {
- %rcp = call double @llvm.AMDGPU.rcp.f64(double %src) nounwind readnone
- store double %rcp, double addrspace(1)* %out, align 8
- ret void
-}
+; FIXME: Evergreen only ever does unsafe fp math.
+; FUNC-LABEL: {{^}}rcp_pat_f32:
+
+; SI-SAFE: v_rcp_f32_e32
+; XSI-SAFE-SPDENORM-NOT: v_rcp_f32_e32
+
+; EG: RECIP_IEEE
-; FUNC-LABEL: @rcp_pat_f32
-; SI-SAFE: V_RCP_F32_e32
-; XSI-SAFE-SPDENORM-NOT: V_RCP_F32_e32
define void @rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind {
%rcp = fdiv float 1.0, %src
store float %rcp, float addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @rcp_pat_f64
-; SI: V_RCP_F64_e32
-define void @rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind {
- %rcp = fdiv double 1.0, %src
- store double %rcp, double addrspace(1)* %out, align 8
- ret void
-}
+; FUNC-LABEL: {{^}}rsq_rcp_pat_f32:
+; SI-UNSAFE: v_rsq_f32_e32
+; SI-SAFE: v_sqrt_f32_e32
+; SI-SAFE: v_rcp_f32_e32
-; FUNC-LABEL: @rsq_rcp_pat_f32
-; SI-UNSAFE: V_RSQ_F32_e32
-; SI-SAFE: V_SQRT_F32_e32
-; SI-SAFE: V_RCP_F32_e32
+; EG: RECIPSQRT_IEEE
define void @rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind {
%sqrt = call float @llvm.sqrt.f32(float %src) nounwind readnone
%rcp = call float @llvm.AMDGPU.rcp.f32(float %sqrt) nounwind readnone
store float %rcp, float addrspace(1)* %out, align 4
ret void
}
-
-; FUNC-LABEL: @rsq_rcp_pat_f64
-; SI-UNSAFE: V_RSQ_F64_e32
-; SI-SAFE-NOT: V_RSQ_F64_e32
-define void @rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind {
- %sqrt = call double @llvm.sqrt.f64(double %src) nounwind readnone
- %rcp = call double @llvm.AMDGPU.rcp.f64(double %sqrt) nounwind readnone
- store double %rcp, double addrspace(1)* %out, align 8
- ret void
-}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll b/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll
index 100d6ff77707..4318aeaac786 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll
@@ -1,9 +1,9 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
declare double @llvm.AMDGPU.rsq.clamped.f64(double) nounwind readnone
-; FUNC-LABEL: @rsq_clamped_f64
-; SI: V_RSQ_CLAMP_F64_e32
+; FUNC-LABEL: {{^}}rsq_clamped_f64:
+; SI: v_rsq_clamp_f64_e32
define void @rsq_clamped_f64(double addrspace(1)* %out, double %src) nounwind {
%rsq_clamped = call double @llvm.AMDGPU.rsq.clamped.f64(double %src) nounwind readnone
store double %rsq_clamped, double addrspace(1)* %out, align 8
diff --git a/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll b/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll
index 683df7355ac6..9336baffc97f 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll
@@ -1,11 +1,11 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare float @llvm.AMDGPU.rsq.clamped.f32(float) nounwind readnone
-; FUNC-LABEL: @rsq_clamped_f32
-; SI: V_RSQ_CLAMP_F32_e32
+; FUNC-LABEL: {{^}}rsq_clamped_f32:
+; SI: v_rsq_clamp_f32_e32
; EG: RECIPSQRT_CLAMPED
define void @rsq_clamped_f32(float addrspace(1)* %out, float %src) nounwind {
%rsq_clamped = call float @llvm.AMDGPU.rsq.clamped.f32(float %src) nounwind readnone
diff --git a/test/CodeGen/R600/llvm.AMDGPU.rsq.ll b/test/CodeGen/R600/llvm.AMDGPU.rsq.ll
index 27cf6b28fd66..f987ef3995de 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.rsq.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.rsq.ll
@@ -1,13 +1,32 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare float @llvm.AMDGPU.rsq.f32(float) nounwind readnone
-; FUNC-LABEL: @rsq_f32
-; SI: V_RSQ_F32_e32
+; FUNC-LABEL: {{^}}rsq_f32:
+; SI: v_rsq_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
; EG: RECIPSQRT_IEEE
define void @rsq_f32(float addrspace(1)* %out, float %src) nounwind {
%rsq = call float @llvm.AMDGPU.rsq.f32(float %src) nounwind readnone
store float %rsq, float addrspace(1)* %out, align 4
ret void
}
+
+; TODO: Really these should be constant folded
+; FUNC-LABEL: {{^}}rsq_f32_constant_4.0
+; SI: v_rsq_f32_e32 {{v[0-9]+}}, 4.0
+; EG: RECIPSQRT_IEEE
+define void @rsq_f32_constant_4.0(float addrspace(1)* %out) nounwind {
+ %rsq = call float @llvm.AMDGPU.rsq.f32(float 4.0) nounwind readnone
+ store float %rsq, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}rsq_f32_constant_100.0
+; SI: v_rsq_f32_e32 {{v[0-9]+}}, 0x42c80000
+; EG: RECIPSQRT_IEEE
+define void @rsq_f32_constant_100.0(float addrspace(1)* %out) nounwind {
+ %rsq = call float @llvm.AMDGPU.rsq.f32(float 100.0) nounwind readnone
+ store float %rsq, float addrspace(1)* %out, align 4
+ ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll b/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll
index 1c736d447ea9..0b1342ee45ce 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll
@@ -1,13 +1,13 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
declare double @llvm.AMDGPU.trig.preop.f64(double, i32) nounwind readnone
-; SI-LABEL: @test_trig_preop_f64:
-; SI-DAG: BUFFER_LOAD_DWORD [[SEG:v[0-9]+]]
-; SI-DAG: BUFFER_LOAD_DWORDX2 [[SRC:v\[[0-9]+:[0-9]+\]]],
-; SI: V_TRIG_PREOP_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SRC]], [[SEG]]
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}test_trig_preop_f64:
+; SI-DAG: buffer_load_dword [[SEG:v[0-9]+]]
+; SI-DAG: buffer_load_dwordx2 [[SRC:v\[[0-9]+:[0-9]+\]]],
+; SI: v_trig_preop_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SRC]], [[SEG]]
+; SI: buffer_store_dwordx2 [[RESULT]],
+; SI: s_endpgm
define void @test_trig_preop_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%a = load double addrspace(1)* %aptr, align 8
%b = load i32 addrspace(1)* %bptr, align 4
@@ -16,11 +16,11 @@ define void @test_trig_preop_f64(double addrspace(1)* %out, double addrspace(1)*
ret void
}
-; SI-LABEL: @test_trig_preop_f64_imm_segment:
-; SI: BUFFER_LOAD_DWORDX2 [[SRC:v\[[0-9]+:[0-9]+\]]],
-; SI: V_TRIG_PREOP_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SRC]], 7
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}test_trig_preop_f64_imm_segment:
+; SI: buffer_load_dwordx2 [[SRC:v\[[0-9]+:[0-9]+\]]],
+; SI: v_trig_preop_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SRC]], 7
+; SI: buffer_store_dwordx2 [[RESULT]],
+; SI: s_endpgm
define void @test_trig_preop_f64_imm_segment(double addrspace(1)* %out, double addrspace(1)* %aptr) nounwind {
%a = load double addrspace(1)* %aptr, align 8
%result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 7) nounwind readnone
diff --git a/test/CodeGen/R600/llvm.AMDGPU.trunc.ll b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll
index e6bb2c4e9455..9c5c74e23315 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.trunc.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll
@@ -1,10 +1,10 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
-; R600-CHECK: @amdgpu_trunc
+; R600-CHECK: {{^}}amdgpu_trunc:
; R600-CHECK: TRUNC T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK: @amdgpu_trunc
-; SI-CHECK: V_TRUNC_F32
+; SI-CHECK: {{^}}amdgpu_trunc:
+; SI-CHECK: v_trunc_f32
define void @amdgpu_trunc(float addrspace(1)* %out, float %x) {
entry:
diff --git a/test/CodeGen/R600/llvm.AMDGPU.umad24.ll b/test/CodeGen/R600/llvm.AMDGPU.umad24.ll
index afdfb18a563b..88613db2161f 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.umad24.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.umad24.ll
@@ -1,13 +1,14 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
; XUN: llc -march=r600 -mcpu=rv770 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
declare i32 @llvm.AMDGPU.umad24(i32, i32, i32) nounwind readnone
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-; FUNC-LABEL: @test_umad24
-; SI: V_MAD_U32_U24
+; FUNC-LABEL: {{^}}test_umad24:
+; SI: v_mad_u32_u24
; EG: MULADD_UINT24
; R600: MULLO_UINT
; R600: ADD_INT
@@ -17,3 +18,21 @@ define void @test_umad24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2
ret void
}
+; FUNC-LABEL: {{^}}commute_umad24:
+; SI-DAG: buffer_load_dword [[SRC0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[SRC2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI: v_mad_u32_u24 [[RESULT:v[0-9]+]], 4, [[SRC0]], [[SRC2]]
+; SI: buffer_store_dword [[RESULT]]
+define void @commute_umad24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %out.gep = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %src0.gep = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %src2.gep = getelementptr i32 addrspace(1)* %src0.gep, i32 1
+
+ %src0 = load i32 addrspace(1)* %src0.gep, align 4
+ %src2 = load i32 addrspace(1)* %src2.gep, align 4
+ %mad = call i32 @llvm.AMDGPU.umad24(i32 %src0, i32 4, i32 %src2) nounwind readnone
+ store i32 %mad, i32 addrspace(1)* %out.gep, align 4
+ ret void
+}
+
diff --git a/test/CodeGen/R600/llvm.AMDGPU.umax.ll b/test/CodeGen/R600/llvm.AMDGPU.umax.ll
index 1b8da2e15534..094ecaca2b4c 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.umax.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.umax.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s
+; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s
-; SI-LABEL: @vector_umax
-; SI: V_MAX_U32_e32
+; SI-LABEL: {{^}}vector_umax:
+; SI: v_max_u32_e32
define void @vector_umax(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
main_body:
%load = load i32 addrspace(1)* %in, align 4
@@ -11,8 +11,8 @@ main_body:
ret void
}
-; SI-LABEL: @scalar_umax
-; SI: S_MAX_U32
+; SI-LABEL: {{^}}scalar_umax:
+; SI: s_max_u32
define void @scalar_umax(i32 %p0, i32 %p1) #0 {
entry:
%max = call i32 @llvm.AMDGPU.umax(i32 %p0, i32 %p1)
@@ -21,11 +21,11 @@ entry:
ret void
}
-; SI-LABEL: @trunc_zext_umax
-; SI: BUFFER_LOAD_UBYTE [[VREG:v[0-9]+]],
-; SI: V_MAX_U32_e32 [[RESULT:v[0-9]+]], 0, [[VREG]]
-; SI-NOT: AND
-; SI: BUFFER_STORE_SHORT [[RESULT]],
+; SI-LABEL: {{^}}trunc_zext_umax:
+; SI: buffer_load_ubyte [[VREG:v[0-9]+]],
+; SI: v_max_u32_e32 [[RESULT:v[0-9]+]], 0, [[VREG]]
+; SI-NOT: and
+; SI: buffer_store_short [[RESULT]],
define void @trunc_zext_umax(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
%tmp5 = load i8 addrspace(1)* %src, align 1
%tmp2 = zext i8 %tmp5 to i32
@@ -44,4 +44,4 @@ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
-!0 = metadata !{metadata !"const", null, i32 1}
+!0 = !{!"const", null, i32 1}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.umin.ll b/test/CodeGen/R600/llvm.AMDGPU.umin.ll
index 08397f8356c9..97fc40a72339 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.umin.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.umin.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s
+; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s
-; SI-LABEL: @vector_umin
-; SI: V_MIN_U32_e32
+; SI-LABEL: {{^}}vector_umin:
+; SI: v_min_u32_e32
define void @vector_umin(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
main_body:
%load = load i32 addrspace(1)* %in, align 4
@@ -11,8 +11,8 @@ main_body:
ret void
}
-; SI-LABEL: @scalar_umin
-; SI: S_MIN_U32
+; SI-LABEL: {{^}}scalar_umin:
+; SI: s_min_u32
define void @scalar_umin(i32 %p0, i32 %p1) #0 {
entry:
%min = call i32 @llvm.AMDGPU.umin(i32 %p0, i32 %p1)
@@ -21,11 +21,11 @@ entry:
ret void
}
-; SI-LABEL: @trunc_zext_umin
-; SI: BUFFER_LOAD_UBYTE [[VREG:v[0-9]+]],
-; SI: V_MIN_U32_e32 [[RESULT:v[0-9]+]], 0, [[VREG]]
-; SI-NOT: AND
-; SI: BUFFER_STORE_SHORT [[RESULT]],
+; SI-LABEL: {{^}}trunc_zext_umin:
+; SI: buffer_load_ubyte [[VREG:v[0-9]+]],
+; SI: v_min_u32_e32 [[RESULT:v[0-9]+]], 0, [[VREG]]
+; SI-NOT: and
+; SI: buffer_store_short [[RESULT]],
define void @trunc_zext_umin(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
%tmp5 = load i8 addrspace(1)* %src, align 1
%tmp2 = zext i8 %tmp5 to i32
@@ -44,4 +44,4 @@ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
-!0 = metadata !{metadata !"const", null, i32 1}
+!0 = !{!"const", null, i32 1}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.umul24.ll b/test/CodeGen/R600/llvm.AMDGPU.umul24.ll
index 72a36029fb31..5a849c224d22 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.umul24.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.umul24.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
@@ -6,8 +6,8 @@
declare i32 @llvm.AMDGPU.umul24(i32, i32) nounwind readnone
-; FUNC-LABEL: @test_umul24
-; SI: V_MUL_U32_U24
+; FUNC-LABEL: {{^}}test_umul24:
+; SI: v_mul_u32_u24
; R600: MUL_UINT24
; R600: MULLO_UINT
define void @test_umul24(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
diff --git a/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
index 0438eccc8862..a7454ef34937 100644
--- a/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
+++ b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
@@ -1,7 +1,7 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;CHECK: S_MOV_B32
-;CHECK-NEXT: V_INTERP_MOV_F32
+;CHECK: s_mov_b32
+;CHECK-NEXT: v_interp_mov_f32
define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" {
main_body:
diff --git a/test/CodeGen/R600/llvm.SI.gather4.ll b/test/CodeGen/R600/llvm.SI.gather4.ll
index 8402faaa4dca..cdf34ca3e7ff 100644
--- a/test/CodeGen/R600/llvm.SI.gather4.ll
+++ b/test/CodeGen/R600/llvm.SI.gather4.ll
@@ -1,7 +1,7 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;CHECK-LABEL: @gather4_v2
-;CHECK: IMAGE_GATHER4 {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_v2:
+;CHECK: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_v2() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -13,8 +13,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @gather4
-;CHECK: IMAGE_GATHER4 {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4:
+;CHECK: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -26,8 +26,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @gather4_cl
-;CHECK: IMAGE_GATHER4_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_cl:
+;CHECK: image_gather4_cl {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.cl.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -39,8 +39,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @gather4_l
-;CHECK: IMAGE_GATHER4_L {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_l:
+;CHECK: image_gather4_l {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_l() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.l.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -52,8 +52,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @gather4_b
-;CHECK: IMAGE_GATHER4_B {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_b:
+;CHECK: image_gather4_b {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_b() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.b.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -65,8 +65,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @gather4_b_cl
-;CHECK: IMAGE_GATHER4_B_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_b_cl:
+;CHECK: image_gather4_b_cl {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_b_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.b.cl.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -78,8 +78,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @gather4_b_cl_v8
-;CHECK: IMAGE_GATHER4_B_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_b_cl_v8:
+;CHECK: image_gather4_b_cl {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_b_cl_v8() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.b.cl.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -91,8 +91,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @gather4_lz_v2
-;CHECK: IMAGE_GATHER4_LZ {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_lz_v2:
+;CHECK: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_lz_v2() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -104,8 +104,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @gather4_lz
-;CHECK: IMAGE_GATHER4_LZ {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_lz:
+;CHECK: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_lz() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.lz.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -119,8 +119,8 @@ main_body:
-;CHECK-LABEL: @gather4_o
-;CHECK: IMAGE_GATHER4_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_o:
+;CHECK: image_gather4_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_o() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -132,8 +132,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @gather4_cl_o
-;CHECK: IMAGE_GATHER4_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_cl_o:
+;CHECK: image_gather4_cl_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_cl_o() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.cl.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -145,8 +145,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @gather4_cl_o_v8
-;CHECK: IMAGE_GATHER4_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_cl_o_v8:
+;CHECK: image_gather4_cl_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_cl_o_v8() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -158,8 +158,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @gather4_l_o
-;CHECK: IMAGE_GATHER4_L_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_l_o:
+;CHECK: image_gather4_l_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_l_o() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.l.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -171,8 +171,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @gather4_l_o_v8
-;CHECK: IMAGE_GATHER4_L_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_l_o_v8:
+;CHECK: image_gather4_l_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_l_o_v8() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.l.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -184,8 +184,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @gather4_b_o
-;CHECK: IMAGE_GATHER4_B_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_b_o:
+;CHECK: image_gather4_b_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_b_o() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.b.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -197,8 +197,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @gather4_b_o_v8
-;CHECK: IMAGE_GATHER4_B_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_b_o_v8:
+;CHECK: image_gather4_b_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_b_o_v8() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.b.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -210,8 +210,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @gather4_b_cl_o
-;CHECK: IMAGE_GATHER4_B_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_b_cl_o:
+;CHECK: image_gather4_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_b_cl_o() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.b.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -223,8 +223,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @gather4_lz_o
-;CHECK: IMAGE_GATHER4_LZ_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_lz_o:
+;CHECK: image_gather4_lz_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_lz_o() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.lz.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -238,8 +238,8 @@ main_body:
-;CHECK-LABEL: @gather4_c
-;CHECK: IMAGE_GATHER4_C {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c:
+;CHECK: image_gather4_c {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -251,8 +251,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @gather4_c_cl
-;CHECK: IMAGE_GATHER4_C_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_cl:
+;CHECK: image_gather4_c_cl {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.cl.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -264,8 +264,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @gather4_c_cl_v8
-;CHECK: IMAGE_GATHER4_C_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_cl_v8:
+;CHECK: image_gather4_c_cl {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_cl_v8() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.cl.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -277,8 +277,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @gather4_c_l
-;CHECK: IMAGE_GATHER4_C_L {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_l:
+;CHECK: image_gather4_c_l {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_l() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.l.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -290,8 +290,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @gather4_c_l_v8
-;CHECK: IMAGE_GATHER4_C_L {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_l_v8:
+;CHECK: image_gather4_c_l {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_l_v8() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.l.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -303,8 +303,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @gather4_c_b
-;CHECK: IMAGE_GATHER4_C_B {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_b:
+;CHECK: image_gather4_c_b {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_b() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.b.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -316,8 +316,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @gather4_c_b_v8
-;CHECK: IMAGE_GATHER4_C_B {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_b_v8:
+;CHECK: image_gather4_c_b {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_b_v8() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.b.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -329,8 +329,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @gather4_c_b_cl
-;CHECK: IMAGE_GATHER4_C_B_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_b_cl:
+;CHECK: image_gather4_c_b_cl {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_b_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.b.cl.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -342,8 +342,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @gather4_c_lz
-;CHECK: IMAGE_GATHER4_C_LZ {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_lz:
+;CHECK: image_gather4_c_lz {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_lz() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.lz.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -357,8 +357,8 @@ main_body:
-;CHECK-LABEL: @gather4_c_o
-;CHECK: IMAGE_GATHER4_C_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_o:
+;CHECK: image_gather4_c_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_o() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -370,8 +370,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @gather4_c_o_v8
-;CHECK: IMAGE_GATHER4_C_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_o_v8:
+;CHECK: image_gather4_c_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_o_v8() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -383,8 +383,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @gather4_c_cl_o
-;CHECK: IMAGE_GATHER4_C_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_cl_o:
+;CHECK: image_gather4_c_cl_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_cl_o() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -396,8 +396,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @gather4_c_l_o
-;CHECK: IMAGE_GATHER4_C_L_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_l_o:
+;CHECK: image_gather4_c_l_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_l_o() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.l.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -409,8 +409,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @gather4_c_b_o
-;CHECK: IMAGE_GATHER4_C_B_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_b_o:
+;CHECK: image_gather4_c_b_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_b_o() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.b.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -422,8 +422,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @gather4_c_b_cl_o
-;CHECK: IMAGE_GATHER4_C_B_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_b_cl_o:
+;CHECK: image_gather4_c_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_b_cl_o() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.b.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -435,8 +435,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @gather4_c_lz_o
-;CHECK: IMAGE_GATHER4_C_LZ_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_lz_o:
+;CHECK: image_gather4_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_lz_o() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.lz.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -448,8 +448,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @gather4_c_lz_o_v8
-;CHECK: IMAGE_GATHER4_C_LZ_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_lz_o_v8:
+;CHECK: image_gather4_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @gather4_c_lz_o_v8() #0 {
main_body:
%r = call <4 x float> @llvm.SI.gather4.c.lz.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
diff --git a/test/CodeGen/R600/llvm.SI.getlod.ll b/test/CodeGen/R600/llvm.SI.getlod.ll
index a7a17ec3fffa..775dd3cd7569 100644
--- a/test/CodeGen/R600/llvm.SI.getlod.ll
+++ b/test/CodeGen/R600/llvm.SI.getlod.ll
@@ -1,7 +1,7 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;CHECK-LABEL: @getlod
-;CHECK: IMAGE_GET_LOD {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}getlod:
+;CHECK: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @getlod() #0 {
main_body:
%r = call <4 x float> @llvm.SI.getlod.i32(i32 undef, <32 x i8> undef, <16 x i8> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -11,8 +11,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @getlod_v2
-;CHECK: IMAGE_GET_LOD {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}getlod_v2:
+;CHECK: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @getlod_v2() #0 {
main_body:
%r = call <4 x float> @llvm.SI.getlod.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -22,8 +22,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @getlod_v4
-;CHECK: IMAGE_GET_LOD {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}getlod_v4:
+;CHECK: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @getlod_v4() #0 {
main_body:
%r = call <4 x float> @llvm.SI.getlod.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
diff --git a/test/CodeGen/R600/llvm.SI.image.ll b/test/CodeGen/R600/llvm.SI.image.ll
index eac0b8eead3a..7c9af7b40f67 100644
--- a/test/CodeGen/R600/llvm.SI.image.ll
+++ b/test/CodeGen/R600/llvm.SI.image.ll
@@ -1,7 +1,7 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;CHECK-LABEL: @image_load
-;CHECK: IMAGE_LOAD {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}image_load:
+;CHECK: image_load {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @image_load() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.load.v4i32(<4 x i32> undef, <8 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -13,8 +13,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @image_load_mip
-;CHECK: IMAGE_LOAD_MIP {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}image_load_mip:
+;CHECK: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @image_load_mip() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.load.mip.v4i32(<4 x i32> undef, <8 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -26,8 +26,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @getresinfo
-;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}getresinfo:
+;CHECK: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}
define void @getresinfo() #0 {
main_body:
%r = call <4 x float> @llvm.SI.getresinfo.i32(i32 undef, <8 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
diff --git a/test/CodeGen/R600/llvm.SI.image.sample.ll b/test/CodeGen/R600/llvm.SI.image.sample.ll
index 14dff7eb5fea..779c8cc573b8 100644
--- a/test/CodeGen/R600/llvm.SI.image.sample.ll
+++ b/test/CodeGen/R600/llvm.SI.image.sample.ll
@@ -1,7 +1,7 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;CHECK-LABEL: @sample
-;CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample:
+;CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -13,8 +13,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_cl
-;CHECK: IMAGE_SAMPLE_CL {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_cl:
+;CHECK: image_sample_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -26,8 +26,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_d
-;CHECK: IMAGE_SAMPLE_D {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_d:
+;CHECK: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_d() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.d.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -39,8 +39,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_d_cl
-;CHECK: IMAGE_SAMPLE_D_CL {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_d_cl:
+;CHECK: image_sample_d_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_d_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.d.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -52,8 +52,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_l
-;CHECK: IMAGE_SAMPLE_L {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_l:
+;CHECK: image_sample_l {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_l() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -65,8 +65,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_b
-;CHECK: IMAGE_SAMPLE_B {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_b:
+;CHECK: image_sample_b {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_b() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -78,8 +78,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_b_cl
-;CHECK: IMAGE_SAMPLE_B_CL {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_b_cl:
+;CHECK: image_sample_b_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_b_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.b.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -91,8 +91,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_lz
-;CHECK: IMAGE_SAMPLE_LZ {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_lz:
+;CHECK: image_sample_lz {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_lz() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.lz.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -104,8 +104,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_cd
-;CHECK: IMAGE_SAMPLE_CD {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_cd:
+;CHECK: image_sample_cd {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_cd() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.cd.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -117,8 +117,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_cd_cl
-;CHECK: IMAGE_SAMPLE_CD_CL {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_cd_cl:
+;CHECK: image_sample_cd_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_cd_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.cd.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -130,8 +130,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_c
-;CHECK: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c:
+;CHECK: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -143,8 +143,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_c_cl
-;CHECK: IMAGE_SAMPLE_C_CL {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_cl:
+;CHECK: image_sample_c_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -156,8 +156,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_c_d
-;CHECK: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_d:
+;CHECK: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_d() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.d.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -169,8 +169,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_c_d_cl
-;CHECK: IMAGE_SAMPLE_C_D_CL {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_d_cl:
+;CHECK: image_sample_c_d_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_d_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.d.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -182,8 +182,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_c_l
-;CHECK: IMAGE_SAMPLE_C_L {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_l:
+;CHECK: image_sample_c_l {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_l() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -195,8 +195,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_c_b
-;CHECK: IMAGE_SAMPLE_C_B {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_b:
+;CHECK: image_sample_c_b {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_b() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.b.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -208,8 +208,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_c_b_cl
-;CHECK: IMAGE_SAMPLE_C_B_CL {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_b_cl:
+;CHECK: image_sample_c_b_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_b_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.b.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -221,8 +221,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_c_lz
-;CHECK: IMAGE_SAMPLE_C_LZ {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_lz:
+;CHECK: image_sample_c_lz {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_lz() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.lz.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -234,8 +234,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_c_cd
-;CHECK: IMAGE_SAMPLE_C_CD {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_cd:
+;CHECK: image_sample_c_cd {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_cd() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.cd.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -247,8 +247,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_c_cd_cl
-;CHECK: IMAGE_SAMPLE_C_CD_CL {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_cd_cl:
+;CHECK: image_sample_c_cd_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_cd_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.cd.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
diff --git a/test/CodeGen/R600/llvm.SI.image.sample.o.ll b/test/CodeGen/R600/llvm.SI.image.sample.o.ll
index ed3ef9140143..7bfb5501206c 100644
--- a/test/CodeGen/R600/llvm.SI.image.sample.o.ll
+++ b/test/CodeGen/R600/llvm.SI.image.sample.o.ll
@@ -1,7 +1,7 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;CHECK-LABEL: @sample
-;CHECK: IMAGE_SAMPLE_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample:
+;CHECK: image_sample_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -13,8 +13,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_cl
-;CHECK: IMAGE_SAMPLE_CL_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_cl:
+;CHECK: image_sample_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -26,8 +26,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_d
-;CHECK: IMAGE_SAMPLE_D_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_d:
+;CHECK: image_sample_d_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_d() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.d.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -39,8 +39,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_d_cl
-;CHECK: IMAGE_SAMPLE_D_CL_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_d_cl:
+;CHECK: image_sample_d_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_d_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.d.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -52,8 +52,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_l
-;CHECK: IMAGE_SAMPLE_L_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_l:
+;CHECK: image_sample_l_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_l() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -65,8 +65,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_b
-;CHECK: IMAGE_SAMPLE_B_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_b:
+;CHECK: image_sample_b_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_b() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.b.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -78,8 +78,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_b_cl
-;CHECK: IMAGE_SAMPLE_B_CL_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_b_cl:
+;CHECK: image_sample_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_b_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.b.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -91,8 +91,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_lz
-;CHECK: IMAGE_SAMPLE_LZ_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_lz:
+;CHECK: image_sample_lz_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_lz() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.lz.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -104,8 +104,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_cd
-;CHECK: IMAGE_SAMPLE_CD_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_cd:
+;CHECK: image_sample_cd_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_cd() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.cd.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -117,8 +117,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_cd_cl
-;CHECK: IMAGE_SAMPLE_CD_CL_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_cd_cl:
+;CHECK: image_sample_cd_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_cd_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.cd.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -130,8 +130,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_c
-;CHECK: IMAGE_SAMPLE_C_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c:
+;CHECK: image_sample_c_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -143,8 +143,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_c_cl
-;CHECK: IMAGE_SAMPLE_C_CL_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_cl:
+;CHECK: image_sample_c_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -156,8 +156,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_c_d
-;CHECK: IMAGE_SAMPLE_C_D_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_d:
+;CHECK: image_sample_c_d_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_d() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.d.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -169,8 +169,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_c_d_cl
-;CHECK: IMAGE_SAMPLE_C_D_CL_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_d_cl:
+;CHECK: image_sample_c_d_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_d_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.d.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -182,8 +182,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_c_l
-;CHECK: IMAGE_SAMPLE_C_L_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_l:
+;CHECK: image_sample_c_l_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_l() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.l.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -195,8 +195,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_c_b
-;CHECK: IMAGE_SAMPLE_C_B_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_b:
+;CHECK: image_sample_c_b_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_b() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.b.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -208,8 +208,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_c_b_cl
-;CHECK: IMAGE_SAMPLE_C_B_CL_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_b_cl:
+;CHECK: image_sample_c_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_b_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.b.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -221,8 +221,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_c_lz
-;CHECK: IMAGE_SAMPLE_C_LZ_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_lz:
+;CHECK: image_sample_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_lz() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.lz.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -234,8 +234,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_c_cd
-;CHECK: IMAGE_SAMPLE_C_CD_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_cd:
+;CHECK: image_sample_c_cd_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_cd() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.cd.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -247,8 +247,8 @@ main_body:
ret void
}
-;CHECK-LABEL: @sample_c_cd_cl
-;CHECK: IMAGE_SAMPLE_C_CD_CL_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_cd_cl:
+;CHECK: image_sample_c_cd_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define void @sample_c_cd_cl() #0 {
main_body:
%r = call <4 x float> @llvm.SI.image.sample.c.cd.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
diff --git a/test/CodeGen/R600/llvm.SI.imageload.ll b/test/CodeGen/R600/llvm.SI.imageload.ll
index 59e00f01c96b..dba6e8f3f0c7 100644
--- a/test/CodeGen/R600/llvm.SI.imageload.ll
+++ b/test/CodeGen/R600/llvm.SI.imageload.ll
@@ -1,15 +1,15 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;CHECK-DAG: IMAGE_LOAD {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, -1
-;CHECK-DAG: IMAGE_LOAD_MIP {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, 0
-;CHECK-DAG: IMAGE_LOAD_MIP {{v[0-9]+}}, 2, 0, 0, 0
-;CHECK-DAG: IMAGE_LOAD_MIP {{v[0-9]+}}, 1, 0, 0, 0
-;CHECK-DAG: IMAGE_LOAD_MIP {{v[0-9]+}}, 4, 0, 0, 0
-;CHECK-DAG: IMAGE_LOAD_MIP {{v[0-9]+}}, 8, 0, 0, 0
-;CHECK-DAG: IMAGE_LOAD_MIP {{v\[[0-9]+:[0-9]+\]}}, 5, 0, 0, 0
-;CHECK-DAG: IMAGE_LOAD_MIP {{v\[[0-9]+:[0-9]+\]}}, 12, 0, 0, -1
-;CHECK-DAG: IMAGE_LOAD_MIP {{v\[[0-9]+:[0-9]+\]}}, 7, 0, 0, 0
-;CHECK-DAG: IMAGE_LOAD_MIP {{v[0-9]+}}, 8, 0, 0, -1
+;CHECK-DAG: image_load {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, -1
+;CHECK-DAG: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, 0
+;CHECK-DAG: image_load_mip {{v[0-9]+}}, 2, 0, 0, 0
+;CHECK-DAG: image_load_mip {{v[0-9]+}}, 1, 0, 0, 0
+;CHECK-DAG: image_load_mip {{v[0-9]+}}, 4, 0, 0, 0
+;CHECK-DAG: image_load_mip {{v[0-9]+}}, 8, 0, 0, 0
+;CHECK-DAG: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 5, 0, 0, 0
+;CHECK-DAG: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 12, 0, 0, -1
+;CHECK-DAG: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 7, 0, 0, 0
+;CHECK-DAG: image_load_mip {{v[0-9]+}}, 8, 0, 0, -1
define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
%v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
@@ -84,7 +84,7 @@ define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
; Test that ccordinates are stored in vgprs and not sgprs
; CHECK: vgpr_coords
-; CHECK: IMAGE_LOAD_MIP {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}
+; CHECK: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}
define void @vgpr_coords(float addrspace(2)* addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
%20 = getelementptr float addrspace(2)* addrspace(2)* %0, i32 0
@@ -126,6 +126,6 @@ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float
attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
-!0 = metadata !{metadata !"const", null}
-!1 = metadata !{}
-!2 = metadata !{metadata !0, metadata !0, i64 0, i32 1}
+!0 = !{!"const", null}
+!1 = !{}
+!2 = !{!0, !0, i64 0, i32 1}
diff --git a/test/CodeGen/R600/llvm.SI.load.dword.ll b/test/CodeGen/R600/llvm.SI.load.dword.ll
index a6227755b72e..ebd16e376302 100644
--- a/test/CodeGen/R600/llvm.SI.load.dword.ll
+++ b/test/CodeGen/R600/llvm.SI.load.dword.ll
@@ -1,13 +1,13 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
; Example of a simple geometry shader loading vertex attributes from the
; ESGS ring buffer
-; CHECK-LABEL: @main
-; CHECK: BUFFER_LOAD_DWORD
-; CHECK: BUFFER_LOAD_DWORD
-; CHECK: BUFFER_LOAD_DWORD
-; CHECK: BUFFER_LOAD_DWORD
+; CHECK-LABEL: {{^}}main:
+; CHECK: buffer_load_dword
+; CHECK: buffer_load_dword
+; CHECK: buffer_load_dword
+; CHECK: buffer_load_dword
define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [2 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, [17 x <16 x i8>] addrspace(2)* inreg, i32, i32, i32, i32) #0 {
main_body:
@@ -37,4 +37,4 @@ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float
attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readonly }
-!0 = metadata !{metadata !"const", null, i32 1}
+!0 = !{!"const", null, i32 1}
diff --git a/test/CodeGen/R600/llvm.SI.resinfo.ll b/test/CodeGen/R600/llvm.SI.resinfo.ll
index af3afc1e1d92..278e05558c6e 100644
--- a/test/CodeGen/R600/llvm.SI.resinfo.ll
+++ b/test/CodeGen/R600/llvm.SI.resinfo.ll
@@ -1,21 +1,21 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
-;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, -1
-;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{v[0-9]+}}, 2, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{v[0-9]+}}, 1, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{v[0-9]+}}, 4, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{v[0-9]+}}, 8, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 5, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 9, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 6, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 10, 0, 0, -1
-;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 12, 0, 0, -1
-;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 7, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 11, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 13, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 14, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{v[0-9]+}}, 8, 0, 0, -1
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, -1
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 2, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 1, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 4, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 8, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 5, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 9, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 6, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 10, 0, 0, -1
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 12, 0, 0, -1
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 7, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 11, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 13, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 14, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 8, 0, 0, -1
define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8,
i32 %a9, i32 %a10, i32 %a11, i32 %a12, i32 %a13, i32 %a14, i32 %a15, i32 %a16) {
diff --git a/test/CodeGen/R600/llvm.SI.sample-masked.ll b/test/CodeGen/R600/llvm.SI.sample-masked.ll
index 445359a4ced7..071938f2c249 100644
--- a/test/CodeGen/R600/llvm.SI.sample-masked.ll
+++ b/test/CodeGen/R600/llvm.SI.sample-masked.ll
@@ -1,7 +1,7 @@
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde | FileCheck %s
-; CHECK-LABEL: @v1
-; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 13
+; CHECK-LABEL: {{^}}v1:
+; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 13
define void @v1(i32 %a1) #0 {
entry:
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
@@ -13,8 +13,8 @@ entry:
ret void
}
-; CHECK-LABEL: @v2
-; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 11
+; CHECK-LABEL: {{^}}v2:
+; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 11
define void @v2(i32 %a1) #0 {
entry:
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
@@ -26,8 +26,8 @@ entry:
ret void
}
-; CHECK-LABEL: @v3
-; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 14
+; CHECK-LABEL: {{^}}v3:
+; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 14
define void @v3(i32 %a1) #0 {
entry:
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
@@ -39,8 +39,8 @@ entry:
ret void
}
-; CHECK-LABEL: @v4
-; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 7
+; CHECK-LABEL: {{^}}v4:
+; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 7
define void @v4(i32 %a1) #0 {
entry:
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
@@ -52,8 +52,8 @@ entry:
ret void
}
-; CHECK-LABEL: @v5
-; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 10
+; CHECK-LABEL: {{^}}v5:
+; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 10
define void @v5(i32 %a1) #0 {
entry:
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
@@ -64,8 +64,8 @@ entry:
ret void
}
-; CHECK-LABEL: @v6
-; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 6
+; CHECK-LABEL: {{^}}v6:
+; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 6
define void @v6(i32 %a1) #0 {
entry:
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
@@ -76,8 +76,8 @@ entry:
ret void
}
-; CHECK-LABEL: @v7
-; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 9
+; CHECK-LABEL: {{^}}v7:
+; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 9
define void @v7(i32 %a1) #0 {
entry:
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
diff --git a/test/CodeGen/R600/llvm.SI.sample.ll b/test/CodeGen/R600/llvm.SI.sample.ll
index 24e8f640d90e..2c2905aaa0c0 100644
--- a/test/CodeGen/R600/llvm.SI.sample.ll
+++ b/test/CodeGen/R600/llvm.SI.sample.ll
@@ -1,21 +1,21 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;CHECK-DAG: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 15
-;CHECK-DAG: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 3
-;CHECK-DAG: IMAGE_SAMPLE {{v[0-9]+}}, 2
-;CHECK-DAG: IMAGE_SAMPLE {{v[0-9]+}}, 1
-;CHECK-DAG: IMAGE_SAMPLE {{v[0-9]+}}, 4
-;CHECK-DAG: IMAGE_SAMPLE {{v[0-9]+}}, 8
-;CHECK-DAG: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 5
-;CHECK-DAG: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 9
-;CHECK-DAG: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 6
-;CHECK-DAG: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 10
-;CHECK-DAG: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 12
-;CHECK-DAG: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 7
-;CHECK-DAG: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 11
-;CHECK-DAG: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 13
-;CHECK-DAG: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 14
-;CHECK-DAG: IMAGE_SAMPLE {{v[0-9]+}}, 8
+;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 15
+;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 3
+;CHECK-DAG: image_sample {{v[0-9]+}}, 2
+;CHECK-DAG: image_sample {{v[0-9]+}}, 1
+;CHECK-DAG: image_sample {{v[0-9]+}}, 4
+;CHECK-DAG: image_sample {{v[0-9]+}}, 8
+;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 5
+;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 9
+;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 6
+;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 10
+;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 12
+;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 7
+;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 11
+;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 13
+;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 14
+;CHECK-DAG: image_sample {{v[0-9]+}}, 8
define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) #0 {
%v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
@@ -135,8 +135,8 @@ define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) #0 {
ret void
}
-; CHECK: @v1
-; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 15
+; CHECK: {{^}}v1:
+; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 15
define void @v1(i32 %a1) #0 {
entry:
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
diff --git a/test/CodeGen/R600/llvm.SI.sampled.ll b/test/CodeGen/R600/llvm.SI.sampled.ll
index 366456f44e6c..e42a48ee7852 100644
--- a/test/CodeGen/R600/llvm.SI.sampled.ll
+++ b/test/CodeGen/R600/llvm.SI.sampled.ll
@@ -1,21 +1,21 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;CHECK-DAG: IMAGE_SAMPLE_D {{v\[[0-9]+:[0-9]+\]}}, 15
-;CHECK-DAG: IMAGE_SAMPLE_D {{v\[[0-9]+:[0-9]+\]}}, 3
-;CHECK-DAG: IMAGE_SAMPLE_D {{v[0-9]+}}, 2
-;CHECK-DAG: IMAGE_SAMPLE_D {{v[0-9]+}}, 1
-;CHECK-DAG: IMAGE_SAMPLE_D {{v[0-9]+}}, 4
-;CHECK-DAG: IMAGE_SAMPLE_D {{v[0-9]+}}, 8
-;CHECK-DAG: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 5
-;CHECK-DAG: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 9
-;CHECK-DAG: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 6
-;CHECK-DAG: IMAGE_SAMPLE_D {{v\[[0-9]+:[0-9]+\]}}, 10
-;CHECK-DAG: IMAGE_SAMPLE_D {{v\[[0-9]+:[0-9]+\]}}, 12
-;CHECK-DAG: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 7
-;CHECK-DAG: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 11
-;CHECK-DAG: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 13
-;CHECK-DAG: IMAGE_SAMPLE_D {{v\[[0-9]+:[0-9]+\]}}, 14
-;CHECK-DAG: IMAGE_SAMPLE_D {{v[0-9]+}}, 8
+;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 15
+;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 3
+;CHECK-DAG: image_sample_d {{v[0-9]+}}, 2
+;CHECK-DAG: image_sample_d {{v[0-9]+}}, 1
+;CHECK-DAG: image_sample_d {{v[0-9]+}}, 4
+;CHECK-DAG: image_sample_d {{v[0-9]+}}, 8
+;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 5
+;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 9
+;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 6
+;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 10
+;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 12
+;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 7
+;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 11
+;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 13
+;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 14
+;CHECK-DAG: image_sample_d {{v[0-9]+}}, 8
define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) #0 {
%v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
diff --git a/test/CodeGen/R600/llvm.SI.sendmsg.ll b/test/CodeGen/R600/llvm.SI.sendmsg.ll
index 581d422b0952..d94b137bc012 100644
--- a/test/CodeGen/R600/llvm.SI.sendmsg.ll
+++ b/test/CodeGen/R600/llvm.SI.sendmsg.ll
@@ -1,10 +1,10 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-; CHECK-LABEL: @main
-; CHECK: S_SENDMSG Gs(emit stream 0)
-; CHECK: S_SENDMSG Gs(cut stream 1)
-; CHECK: S_SENDMSG Gs(emit-cut stream 2)
-; CHECK: S_SENDMSG Gs_done(nop)
+; CHECK-LABEL: {{^}}main:
+; CHECK: s_sendmsg Gs(emit stream 0)
+; CHECK: s_sendmsg Gs(cut stream 1)
+; CHECK: s_sendmsg Gs(emit-cut stream 2)
+; CHECK: s_sendmsg Gs_done(nop)
define void @main() {
main_body:
diff --git a/test/CodeGen/R600/llvm.SI.tbuffer.store.ll b/test/CodeGen/R600/llvm.SI.tbuffer.store.ll
index 740581a69666..320597ec8475 100644
--- a/test/CodeGen/R600/llvm.SI.tbuffer.store.ll
+++ b/test/CodeGen/R600/llvm.SI.tbuffer.store.ll
@@ -1,7 +1,7 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;CHECK-LABEL: @test1
-;CHECK: TBUFFER_STORE_FORMAT_XYZW {{v\[[0-9]+:[0-9]+\]}}, 0x20, -1, 0, -1, 0, 14, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
+;CHECK-LABEL: {{^}}test1:
+;CHECK: tbuffer_store_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, 0x20, -1, 0, -1, 0, 14, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
define void @test1(i32 %a1, i32 %vaddr) #0 {
%vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
@@ -10,8 +10,8 @@ define void @test1(i32 %a1, i32 %vaddr) #0 {
ret void
}
-;CHECK-LABEL: @test2
-;CHECK: TBUFFER_STORE_FORMAT_XYZ {{v\[[0-9]+:[0-9]+\]}}, 0x18, -1, 0, -1, 0, 13, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
+;CHECK-LABEL: {{^}}test2:
+;CHECK: tbuffer_store_format_xyz {{v\[[0-9]+:[0-9]+\]}}, 0x18, -1, 0, -1, 0, 13, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
define void @test2(i32 %a1, i32 %vaddr) #0 {
%vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
@@ -20,8 +20,8 @@ define void @test2(i32 %a1, i32 %vaddr) #0 {
ret void
}
-;CHECK-LABEL: @test3
-;CHECK: TBUFFER_STORE_FORMAT_XY {{v\[[0-9]+:[0-9]+\]}}, 0x10, -1, 0, -1, 0, 11, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
+;CHECK-LABEL: {{^}}test3:
+;CHECK: tbuffer_store_format_xy {{v\[[0-9]+:[0-9]+\]}}, 0x10, -1, 0, -1, 0, 11, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
define void @test3(i32 %a1, i32 %vaddr) #0 {
%vdata = insertelement <2 x i32> undef, i32 %a1, i32 0
call void @llvm.SI.tbuffer.store.v2i32(<16 x i8> undef, <2 x i32> %vdata,
@@ -30,8 +30,8 @@ define void @test3(i32 %a1, i32 %vaddr) #0 {
ret void
}
-;CHECK-LABEL: @test4
-;CHECK: TBUFFER_STORE_FORMAT_X {{v[0-9]+}}, 0x8, -1, 0, -1, 0, 4, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
+;CHECK-LABEL: {{^}}test4:
+;CHECK: tbuffer_store_format_x {{v[0-9]+}}, 0x8, -1, 0, -1, 0, 4, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
define void @test4(i32 %vdata, i32 %vaddr) #0 {
call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %vdata,
i32 1, i32 %vaddr, i32 0, i32 8, i32 4, i32 4, i32 1, i32 0, i32 1,
diff --git a/test/CodeGen/R600/llvm.SI.tid.ll b/test/CodeGen/R600/llvm.SI.tid.ll
index fe17304732ad..64efd2daf338 100644
--- a/test/CodeGen/R600/llvm.SI.tid.ll
+++ b/test/CodeGen/R600/llvm.SI.tid.ll
@@ -1,7 +1,7 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;CHECK: V_MBCNT_LO_U32_B32_e64
-;CHECK: V_MBCNT_HI_U32_B32_e32
+;CHECK: v_mbcnt_lo_u32_b32_e64
+;CHECK: v_mbcnt_hi_u32_b32_e32
define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" {
main_body:
diff --git a/test/CodeGen/R600/llvm.amdgpu.kilp.ll b/test/CodeGen/R600/llvm.amdgpu.kilp.ll
index 1b8b1bfd2089..cca42bf35393 100644
--- a/test/CodeGen/R600/llvm.amdgpu.kilp.ll
+++ b/test/CodeGen/R600/llvm.amdgpu.kilp.ll
@@ -1,7 +1,7 @@
-; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-; SI-LABEL: @kilp_gs_const
-; SI: S_MOV_B64 exec, 0
+; SI-LABEL: {{^}}kilp_gs_const:
+; SI: s_mov_b64 exec, 0
define void @kilp_gs_const() #0 {
main_body:
%0 = icmp ule i32 0, 3
@@ -17,4 +17,4 @@ declare void @llvm.AMDGPU.kilp(float)
attributes #0 = { "ShaderType"="2" }
-!0 = metadata !{metadata !"const", null, i32 1}
+!0 = !{!"const", null, i32 1}
diff --git a/test/CodeGen/R600/llvm.amdgpu.lrp.ll b/test/CodeGen/R600/llvm.amdgpu.lrp.ll
index c493a016e330..a1b7bf0ec43c 100644
--- a/test/CodeGen/R600/llvm.amdgpu.lrp.ll
+++ b/test/CodeGen/R600/llvm.amdgpu.lrp.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
declare float @llvm.AMDGPU.lrp(float, float, float) nounwind readnone
-; FUNC-LABEL: @test_lrp
-; SI: V_SUB_F32
-; SI: V_MAD_F32
+; FUNC-LABEL: {{^}}test_lrp:
+; SI: v_sub_f32
+; SI: v_mad_f32
define void @test_lrp(float addrspace(1)* %out, float %src0, float %src1, float %src2) nounwind {
%mad = call float @llvm.AMDGPU.lrp(float %src0, float %src1, float %src2) nounwind readnone
store float %mad, float addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/llvm.cos.ll b/test/CodeGen/R600/llvm.cos.ll
index 9e7a4deda69b..9be81807be21 100644
--- a/test/CodeGen/R600/llvm.cos.ll
+++ b/test/CodeGen/R600/llvm.cos.ll
@@ -1,5 +1,5 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -check-prefix=EG -check-prefix=FUNC
-;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s -check-prefix=SI -check-prefix=FUNC
+;RUN: llc < %s -march=amdgcn -mcpu=SI | FileCheck %s -check-prefix=SI -check-prefix=FUNC
;FUNC-LABEL: test
;EG: MULADD_IEEE *
@@ -7,8 +7,8 @@
;EG: ADD *
;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
;EG-NOT: COS
-;SI: V_COS_F32
-;SI-NOT: V_COS_F32
+;SI: v_cos_f32
+;SI-NOT: v_cos_f32
define void @test(float addrspace(1)* %out, float %x) #1 {
%cos = call float @llvm.cos.f32(float %x)
@@ -22,11 +22,11 @@ define void @test(float addrspace(1)* %out, float %x) #1 {
;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
;EG-NOT: COS
-;SI: V_COS_F32
-;SI: V_COS_F32
-;SI: V_COS_F32
-;SI: V_COS_F32
-;SI-NOT: V_COS_F32
+;SI: v_cos_f32
+;SI: v_cos_f32
+;SI: v_cos_f32
+;SI: v_cos_f32
+;SI-NOT: v_cos_f32
define void @testv(<4 x float> addrspace(1)* %out, <4 x float> inreg %vx) #1 {
%cos = call <4 x float> @llvm.cos.v4f32(<4 x float> %vx)
diff --git a/test/CodeGen/R600/llvm.exp2.ll b/test/CodeGen/R600/llvm.exp2.ll
index 119d5ef49a5e..9e78134b084f 100644
--- a/test/CodeGen/R600/llvm.exp2.ll
+++ b/test/CodeGen/R600/llvm.exp2.ll
@@ -1,14 +1,14 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK --check-prefix=FUNC
;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK --check-prefix=FUNC
-;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC
+;RUN: llc < %s -march=amdgcn -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC
-;FUNC-LABEL: @test
+;FUNC-LABEL: {{^}}test:
;EG-CHECK: EXP_IEEE
;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
-;SI-CHECK: V_EXP_F32
+;SI-CHECK: v_exp_f32
define void @test(float addrspace(1)* %out, float %in) {
entry:
@@ -17,7 +17,7 @@ entry:
ret void
}
-;FUNC-LABEL: @testv2
+;FUNC-LABEL: {{^}}testv2:
;EG-CHECK: EXP_IEEE
;EG-CHECK: EXP_IEEE
; FIXME: We should be able to merge these packets together on Cayman so we
@@ -30,8 +30,8 @@ entry:
;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
-;SI-CHECK: V_EXP_F32
-;SI-CHECK: V_EXP_F32
+;SI-CHECK: v_exp_f32
+;SI-CHECK: v_exp_f32
define void @testv2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
entry:
@@ -40,7 +40,7 @@ entry:
ret void
}
-;FUNC-LABEL: @testv4
+;FUNC-LABEL: {{^}}testv4:
;EG-CHECK: EXP_IEEE
;EG-CHECK: EXP_IEEE
;EG-CHECK: EXP_IEEE
@@ -63,10 +63,10 @@ entry:
;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
-;SI-CHECK: V_EXP_F32
-;SI-CHECK: V_EXP_F32
-;SI-CHECK: V_EXP_F32
-;SI-CHECK: V_EXP_F32
+;SI-CHECK: v_exp_f32
+;SI-CHECK: v_exp_f32
+;SI-CHECK: v_exp_f32
+;SI-CHECK: v_exp_f32
define void @testv4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
entry:
%0 = call <4 x float> @llvm.exp2.v4f32(<4 x float> %in)
diff --git a/test/CodeGen/R600/llvm.floor.ll b/test/CodeGen/R600/llvm.floor.ll
index f7071cd9b879..1016ff75ce9b 100644
--- a/test/CodeGen/R600/llvm.floor.ll
+++ b/test/CodeGen/R600/llvm.floor.ll
@@ -1,10 +1,10 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
-; R600-CHECK: @f32
+; R600-CHECK: {{^}}f32:
; R600-CHECK: FLOOR
-; SI-CHECK: @f32
-; SI-CHECK: V_FLOOR_F32_e32
+; SI-CHECK: {{^}}f32:
+; SI-CHECK: v_floor_f32_e32
define void @f32(float addrspace(1)* %out, float %in) {
entry:
%0 = call float @llvm.floor.f32(float %in)
@@ -12,12 +12,12 @@ entry:
ret void
}
-; R600-CHECK: @v2f32
+; R600-CHECK: {{^}}v2f32:
; R600-CHECK: FLOOR
; R600-CHECK: FLOOR
-; SI-CHECK: @v2f32
-; SI-CHECK: V_FLOOR_F32_e32
-; SI-CHECK: V_FLOOR_F32_e32
+; SI-CHECK: {{^}}v2f32:
+; SI-CHECK: v_floor_f32_e32
+; SI-CHECK: v_floor_f32_e32
define void @v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
entry:
%0 = call <2 x float> @llvm.floor.v2f32(<2 x float> %in)
@@ -25,16 +25,16 @@ entry:
ret void
}
-; R600-CHECK: @v4f32
+; R600-CHECK: {{^}}v4f32:
; R600-CHECK: FLOOR
; R600-CHECK: FLOOR
; R600-CHECK: FLOOR
; R600-CHECK: FLOOR
-; SI-CHECK: @v4f32
-; SI-CHECK: V_FLOOR_F32_e32
-; SI-CHECK: V_FLOOR_F32_e32
-; SI-CHECK: V_FLOOR_F32_e32
-; SI-CHECK: V_FLOOR_F32_e32
+; SI-CHECK: {{^}}v4f32:
+; SI-CHECK: v_floor_f32_e32
+; SI-CHECK: v_floor_f32_e32
+; SI-CHECK: v_floor_f32_e32
+; SI-CHECK: v_floor_f32_e32
define void @v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
entry:
%0 = call <4 x float> @llvm.floor.v4f32(<4 x float> %in)
diff --git a/test/CodeGen/R600/llvm.log2.ll b/test/CodeGen/R600/llvm.log2.ll
index 4cba2d44a5c3..2373c6b036d2 100644
--- a/test/CodeGen/R600/llvm.log2.ll
+++ b/test/CodeGen/R600/llvm.log2.ll
@@ -1,14 +1,14 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK --check-prefix=FUNC
;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK --check-prefix=FUNC
-;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC
+;RUN: llc < %s -march=amdgcn -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC
-;FUNC-LABEL: @test
+;FUNC-LABEL: {{^}}test:
;EG-CHECK: LOG_IEEE
;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
-;SI-CHECK: V_LOG_F32
+;SI-CHECK: v_log_f32
define void @test(float addrspace(1)* %out, float %in) {
entry:
@@ -17,7 +17,7 @@ entry:
ret void
}
-;FUNC-LABEL: @testv2
+;FUNC-LABEL: {{^}}testv2:
;EG-CHECK: LOG_IEEE
;EG-CHECK: LOG_IEEE
; FIXME: We should be able to merge these packets together on Cayman so we
@@ -30,8 +30,8 @@ entry:
;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
-;SI-CHECK: V_LOG_F32
-;SI-CHECK: V_LOG_F32
+;SI-CHECK: v_log_f32
+;SI-CHECK: v_log_f32
define void @testv2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
entry:
@@ -40,7 +40,7 @@ entry:
ret void
}
-;FUNC-LABEL: @testv4
+;FUNC-LABEL: {{^}}testv4:
;EG-CHECK: LOG_IEEE
;EG-CHECK: LOG_IEEE
;EG-CHECK: LOG_IEEE
@@ -63,10 +63,10 @@ entry:
;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
-;SI-CHECK: V_LOG_F32
-;SI-CHECK: V_LOG_F32
-;SI-CHECK: V_LOG_F32
-;SI-CHECK: V_LOG_F32
+;SI-CHECK: v_log_f32
+;SI-CHECK: v_log_f32
+;SI-CHECK: v_log_f32
+;SI-CHECK: v_log_f32
define void @testv4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
entry:
%0 = call <4 x float> @llvm.log2.v4f32(<4 x float> %in)
diff --git a/test/CodeGen/R600/llvm.memcpy.ll b/test/CodeGen/R600/llvm.memcpy.ll
new file mode 100644
index 000000000000..9771062ed0dd
--- /dev/null
+++ b/test/CodeGen/R600/llvm.memcpy.ll
@@ -0,0 +1,364 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
+
+
+; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align1:
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_read_u8
+
+
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+
+; SI: s_endpgm
+define void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
+ %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
+ %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
+ call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 1, i1 false) nounwind
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align2:
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+
+; SI: s_endpgm
+define void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
+ %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
+ %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
+ call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 2, i1 false) nounwind
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align4:
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI: s_endpgm
+define void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
+ %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
+ %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
+ call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 4, i1 false) nounwind
+ ret void
+}
+
+; FIXME: Use 64-bit ops
+; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align8:
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: s_endpgm
+define void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
+ %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
+ %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
+ call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 8, i1 false) nounwind
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align1:
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+
+; SI: s_endpgm
+define void @test_small_memcpy_i64_global_to_global_align1(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
+ %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
+ %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
+ call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 1, i1 false) nounwind
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align2:
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+
+; SI: s_endpgm
+define void @test_small_memcpy_i64_global_to_global_align2(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
+ %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
+ %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
+ call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 2, i1 false) nounwind
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align4:
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: s_endpgm
+define void @test_small_memcpy_i64_global_to_global_align4(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
+ %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
+ %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
+ call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 4, i1 false) nounwind
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align8:
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: s_endpgm
+define void @test_small_memcpy_i64_global_to_global_align8(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
+ %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
+ %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
+ call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 8, i1 false) nounwind
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align16:
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: s_endpgm
+define void @test_small_memcpy_i64_global_to_global_align16(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
+ %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
+ %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
+ call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 16, i1 false) nounwind
+ ret void
+}
diff --git a/test/CodeGen/R600/llvm.rint.f64.ll b/test/CodeGen/R600/llvm.rint.f64.ll
index 3e2884b7ce02..2c926341f78a 100644
--- a/test/CodeGen/R600/llvm.rint.f64.ll
+++ b/test/CodeGen/R600/llvm.rint.f64.ll
@@ -1,15 +1,15 @@
-; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; FUNC-LABEL: @rint_f64
-; CI: V_RNDNE_F64_e32
+; FUNC-LABEL: {{^}}rint_f64:
+; CI: v_rndne_f64_e32
-; SI-DAG: V_ADD_F64
-; SI-DAG: V_ADD_F64
-; SI-DAG V_CMP_GT_F64_e64
-; SI: V_CNDMASK_B32
-; SI: V_CNDMASK_B32
-; SI: S_ENDPGM
+; SI-DAG: v_add_f64
+; SI-DAG: v_add_f64
+; SI-DAG v_cmp_gt_f64_e64
+; SI: v_cndmask_b32
+; SI: v_cndmask_b32
+; SI: s_endpgm
define void @rint_f64(double addrspace(1)* %out, double %in) {
entry:
%0 = call double @llvm.rint.f64(double %in)
@@ -17,9 +17,9 @@ entry:
ret void
}
-; FUNC-LABEL: @rint_v2f64
-; CI: V_RNDNE_F64_e32
-; CI: V_RNDNE_F64_e32
+; FUNC-LABEL: {{^}}rint_v2f64:
+; CI: v_rndne_f64_e32
+; CI: v_rndne_f64_e32
define void @rint_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
entry:
%0 = call <2 x double> @llvm.rint.v2f64(<2 x double> %in)
@@ -27,11 +27,11 @@ entry:
ret void
}
-; FUNC-LABEL: @rint_v4f64
-; CI: V_RNDNE_F64_e32
-; CI: V_RNDNE_F64_e32
-; CI: V_RNDNE_F64_e32
-; CI: V_RNDNE_F64_e32
+; FUNC-LABEL: {{^}}rint_v4f64:
+; CI: v_rndne_f64_e32
+; CI: v_rndne_f64_e32
+; CI: v_rndne_f64_e32
+; CI: v_rndne_f64_e32
define void @rint_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
entry:
%0 = call <4 x double> @llvm.rint.v4f64(<4 x double> %in)
diff --git a/test/CodeGen/R600/llvm.rint.ll b/test/CodeGen/R600/llvm.rint.ll
index 209bb4358fd5..496cf07e7ca8 100644
--- a/test/CodeGen/R600/llvm.rint.ll
+++ b/test/CodeGen/R600/llvm.rint.ll
@@ -1,10 +1,10 @@
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; FUNC-LABEL: @rint_f32
+; FUNC-LABEL: {{^}}rint_f32:
; R600: RNDNE
-; SI: V_RNDNE_F32_e32
+; SI: v_rndne_f32_e32
define void @rint_f32(float addrspace(1)* %out, float %in) {
entry:
%0 = call float @llvm.rint.f32(float %in) #0
@@ -12,12 +12,12 @@ entry:
ret void
}
-; FUNC-LABEL: @rint_v2f32
+; FUNC-LABEL: {{^}}rint_v2f32:
; R600: RNDNE
; R600: RNDNE
-; SI: V_RNDNE_F32_e32
-; SI: V_RNDNE_F32_e32
+; SI: v_rndne_f32_e32
+; SI: v_rndne_f32_e32
define void @rint_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
entry:
%0 = call <2 x float> @llvm.rint.v2f32(<2 x float> %in) #0
@@ -25,16 +25,16 @@ entry:
ret void
}
-; FUNC-LABEL: @rint_v4f32
+; FUNC-LABEL: {{^}}rint_v4f32:
; R600: RNDNE
; R600: RNDNE
; R600: RNDNE
; R600: RNDNE
-; SI: V_RNDNE_F32_e32
-; SI: V_RNDNE_F32_e32
-; SI: V_RNDNE_F32_e32
-; SI: V_RNDNE_F32_e32
+; SI: v_rndne_f32_e32
+; SI: v_rndne_f32_e32
+; SI: v_rndne_f32_e32
+; SI: v_rndne_f32_e32
define void @rint_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
entry:
%0 = call <4 x float> @llvm.rint.v4f32(<4 x float> %in) #0
@@ -42,10 +42,10 @@ entry:
ret void
}
-; FUNC-LABEL: @legacy_amdil_round_nearest_f32
+; FUNC-LABEL: {{^}}legacy_amdil_round_nearest_f32:
; R600: RNDNE
-; SI: V_RNDNE_F32_e32
+; SI: v_rndne_f32_e32
define void @legacy_amdil_round_nearest_f32(float addrspace(1)* %out, float %in) {
entry:
%0 = call float @llvm.AMDIL.round.nearest.f32(float %in) #0
diff --git a/test/CodeGen/R600/llvm.round.ll b/test/CodeGen/R600/llvm.round.ll
index e06d45d4a373..bedf4ba72ae4 100644
--- a/test/CodeGen/R600/llvm.round.ll
+++ b/test/CodeGen/R600/llvm.round.ll
@@ -1,11 +1,13 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600 --check-prefix=FUNC
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=R600 --check-prefix=FUNC
-; FUNC-LABEL: @f32
-; R600: FRACT
-; R600-DAG: ADD
-; R600-DAG: CEIL
-; R600-DAG: FLOOR
-; R600: CNDGE
+; FUNC-LABEL: {{^}}f32:
+; R600: FRACT {{.*}}, [[ARG:KC[0-9]\[[0-9]+\]\.[XYZW]]]
+; R600-DAG: ADD {{.*}}, -0.5
+; R600-DAG: CEIL {{.*}} [[ARG]]
+; R600-DAG: FLOOR {{.*}} [[ARG]]
+; R600-DAG: CNDGE
+; R600-DAG: CNDGT
+; R600: CNDGE {{[^,]+}}, [[ARG]]
define void @f32(float addrspace(1)* %out, float %in) {
entry:
%0 = call float @llvm.round.f32(float %in)
diff --git a/test/CodeGen/R600/llvm.sin.ll b/test/CodeGen/R600/llvm.sin.ll
index 53006bad5c4b..d63d698b5554 100644
--- a/test/CodeGen/R600/llvm.sin.ll
+++ b/test/CodeGen/R600/llvm.sin.ll
@@ -1,53 +1,84 @@
-;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-;RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=SI-SAFE -check-prefix=FUNC %s
-;RUN: llc -march=r600 -mcpu=SI -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-UNSAFE -check-prefix=FUNC %s
-
-;FUNC-LABEL: test
-;EG: MULADD_IEEE *
-;EG: FRACT *
-;EG: ADD *
-;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-;EG-NOT: SIN
-;SI: V_MUL_F32
-;SI: V_FRACT_F32
-;SI: V_SIN_F32
-;SI-NOT: V_SIN_F32
-
-define void @test(float addrspace(1)* %out, float %x) #1 {
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=SI-SAFE -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-UNSAFE -check-prefix=FUNC %s
+
+; FUNC-LABEL: sin_f32
+; EG: MULADD_IEEE *
+; EG: FRACT *
+; EG: ADD *
+; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; EG-NOT: SIN
+; SI: v_mul_f32
+; SI: v_fract_f32
+; SI: v_sin_f32
+; SI-NOT: v_sin_f32
+
+define void @sin_f32(float addrspace(1)* %out, float %x) #1 {
%sin = call float @llvm.sin.f32(float %x)
store float %sin, float addrspace(1)* %out
ret void
}
-;FUNC-LABEL: testf
-;SI-UNSAFE: 4.774
-;SI-UNSAFE: V_MUL_F32
-;SI-SAFE: V_MUL_F32
-;SI-SAFE: V_MUL_F32
-;SI: V_FRACT_F32
-;SI: V_SIN_F32
-;SI-NOT: V_SIN_F32
-
-define void @testf(float addrspace(1)* %out, float %x) #1 {
- %y = fmul float 3.0, %x
+; FUNC-LABEL: {{^}}sin_3x_f32:
+; SI-UNSAFE-NOT: v_add_f32
+; SI-UNSAFE: 0x3ef47644
+; SI-UNSAFE: v_mul_f32
+; SI-SAFE: v_mul_f32
+; SI-SAFE: v_mul_f32
+; SI: v_fract_f32
+; SI: v_sin_f32
+; SI-NOT: v_sin_f32
+define void @sin_3x_f32(float addrspace(1)* %out, float %x) #1 {
+ %y = fmul float 3.0, %x
+ %sin = call float @llvm.sin.f32(float %y)
+ store float %sin, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sin_2x_f32:
+; SI-UNSAFE-NOT: v_add_f32
+; SI-UNSAFE: 0x3ea2f983
+; SI-UNSAFE: v_mul_f32
+; SI-SAFE: v_add_f32
+; SI-SAFE: v_mul_f32
+; SI: v_fract_f32
+; SI: v_sin_f32
+; SI-NOT: v_sin_f32
+define void @sin_2x_f32(float addrspace(1)* %out, float %x) #1 {
+ %y = fmul float 2.0, %x
+ %sin = call float @llvm.sin.f32(float %y)
+ store float %sin, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_2sin_f32:
+; SI-UNSAFE: 0x3ea2f983
+; SI-UNSAFE: v_mul_f32
+; SI-SAFE: v_add_f32
+; SI-SAFE: v_mul_f32
+; SI: v_fract_f32
+; SI: v_sin_f32
+; SI-NOT: v_sin_f32
+define void @test_2sin_f32(float addrspace(1)* %out, float %x) #1 {
+ %y = fmul float 2.0, %x
%sin = call float @llvm.sin.f32(float %y)
store float %sin, float addrspace(1)* %out
ret void
}
-;FUNC-LABEL: testv
-;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-;EG-NOT: SIN
-;SI: V_SIN_F32
-;SI: V_SIN_F32
-;SI: V_SIN_F32
-;SI: V_SIN_F32
-;SI-NOT: V_SIN_F32
-
-define void @testv(<4 x float> addrspace(1)* %out, <4 x float> %vx) #1 {
+; FUNC-LABEL: {{^}}sin_v4f32:
+; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; EG-NOT: SIN
+; SI: v_sin_f32
+; SI: v_sin_f32
+; SI: v_sin_f32
+; SI: v_sin_f32
+; SI-NOT: v_sin_f32
+
+define void @sin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %vx) #1 {
%sin = call <4 x float> @llvm.sin.v4f32( <4 x float> %vx)
store <4 x float> %sin, <4 x float> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/llvm.sqrt.ll b/test/CodeGen/R600/llvm.sqrt.ll
index 4eee37ffbe21..c0392256c202 100644
--- a/test/CodeGen/R600/llvm.sqrt.ll
+++ b/test/CodeGen/R600/llvm.sqrt.ll
@@ -1,11 +1,11 @@
; RUN: llc < %s -march=r600 --mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
; RUN: llc < %s -march=r600 --mcpu=SI -verify-machineinstrs| FileCheck %s --check-prefix=SI-CHECK
-; R600-CHECK-LABEL: @sqrt_f32
+; R600-CHECK-LABEL: {{^}}sqrt_f32:
; R600-CHECK: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[2].Z
; R600-CHECK: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[2].Z, PS
-; SI-CHECK-LABEL: @sqrt_f32
-; SI-CHECK: V_SQRT_F32_e32
+; SI-CHECK-LABEL: {{^}}sqrt_f32:
+; SI-CHECK: v_sqrt_f32_e32
define void @sqrt_f32(float addrspace(1)* %out, float %in) {
entry:
%0 = call float @llvm.sqrt.f32(float %in)
@@ -13,14 +13,14 @@ entry:
ret void
}
-; R600-CHECK-LABEL: @sqrt_v2f32
+; R600-CHECK-LABEL: {{^}}sqrt_v2f32:
; R600-CHECK-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[2].W
; R600-CHECK-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[2].W, PS
; R600-CHECK-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].X
; R600-CHECK-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].X, PS
-; SI-CHECK-LABEL: @sqrt_v2f32
-; SI-CHECK: V_SQRT_F32_e32
-; SI-CHECK: V_SQRT_F32_e32
+; SI-CHECK-LABEL: {{^}}sqrt_v2f32:
+; SI-CHECK: v_sqrt_f32_e32
+; SI-CHECK: v_sqrt_f32_e32
define void @sqrt_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
entry:
%0 = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %in)
@@ -28,7 +28,7 @@ entry:
ret void
}
-; R600-CHECK-LABEL: @sqrt_v4f32
+; R600-CHECK-LABEL: {{^}}sqrt_v4f32:
; R600-CHECK-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].Y
; R600-CHECK-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].Y, PS
; R600-CHECK-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].Z
@@ -37,11 +37,11 @@ entry:
; R600-CHECK-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].W, PS
; R600-CHECK-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[4].X
; R600-CHECK-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[4].X, PS
-; SI-CHECK-LABEL: @sqrt_v4f32
-; SI-CHECK: V_SQRT_F32_e32
-; SI-CHECK: V_SQRT_F32_e32
-; SI-CHECK: V_SQRT_F32_e32
-; SI-CHECK: V_SQRT_F32_e32
+; SI-CHECK-LABEL: {{^}}sqrt_v4f32:
+; SI-CHECK: v_sqrt_f32_e32
+; SI-CHECK: v_sqrt_f32_e32
+; SI-CHECK: v_sqrt_f32_e32
+; SI-CHECK: v_sqrt_f32_e32
define void @sqrt_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
entry:
%0 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %in)
diff --git a/test/CodeGen/R600/llvm.trunc.ll b/test/CodeGen/R600/llvm.trunc.ll
index fa6fb9906dde..5585477ef294 100644
--- a/test/CodeGen/R600/llvm.trunc.ll
+++ b/test/CodeGen/R600/llvm.trunc.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-; CHECK-LABEL: @trunc_f32
+; CHECK-LABEL: {{^}}trunc_f32:
; CHECK: TRUNC
define void @trunc_f32(float addrspace(1)* %out, float %in) {
diff --git a/test/CodeGen/R600/load-i1.ll b/test/CodeGen/R600/load-i1.ll
index 9ba81b85f59b..85ec5eb9ec8a 100644
--- a/test/CodeGen/R600/load-i1.ll
+++ b/test/CodeGen/R600/load-i1.ll
@@ -1,21 +1,57 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; FUNC-LABEL: {{^}}global_copy_i1_to_i1:
+; SI: buffer_load_ubyte
+; SI: v_and_b32_e32 v{{[0-9]+}}, 1
+; SI: buffer_store_byte
+; SI: s_endpgm
-; SI-LABEL: @global_copy_i1_to_i1
-; SI: BUFFER_LOAD_UBYTE
-; SI: V_AND_B32_e32 v{{[0-9]+}}, 1
-; SI: BUFFER_STORE_BYTE
-; SI: S_ENDPGM
+; EG: VTX_READ_8
+; EG: AND_INT
define void @global_copy_i1_to_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
%load = load i1 addrspace(1)* %in
store i1 %load, i1 addrspace(1)* %out, align 1
ret void
}
-; SI-LABEL: @global_sextload_i1_to_i32
-; XSI: BUFFER_LOAD_BYTE
-; SI: BUFFER_STORE_DWORD
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}local_copy_i1_to_i1:
+; SI: ds_read_u8
+; SI: v_and_b32_e32 v{{[0-9]+}}, 1
+; SI: ds_write_b8
+; SI: s_endpgm
+
+; EG: LDS_UBYTE_READ_RET
+; EG: AND_INT
+; EG: LDS_BYTE_WRITE
+define void @local_copy_i1_to_i1(i1 addrspace(3)* %out, i1 addrspace(3)* %in) nounwind {
+ %load = load i1 addrspace(3)* %in
+ store i1 %load, i1 addrspace(3)* %out, align 1
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_copy_i1_to_i1:
+; SI: buffer_load_ubyte
+; SI: v_and_b32_e32 v{{[0-9]+}}, 1
+; SI: buffer_store_byte
+; SI: s_endpgm
+
+; EG: VTX_READ_8
+; EG: AND_INT
+define void @constant_copy_i1_to_i1(i1 addrspace(1)* %out, i1 addrspace(2)* %in) nounwind {
+ %load = load i1 addrspace(2)* %in
+ store i1 %load, i1 addrspace(1)* %out, align 1
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_i1_to_i32:
+; SI: buffer_load_ubyte
+; SI: v_bfe_i32
+; SI: buffer_store_dword
+; SI: s_endpgm
+
+; EG: VTX_READ_8
+; EG: BFE_INT
define void @global_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
%load = load i1 addrspace(1)* %in
%ext = sext i1 %load to i32
@@ -23,10 +59,11 @@ define void @global_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)*
ret void
}
-; SI-LABEL: @global_zextload_i1_to_i32
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_STORE_DWORD
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}global_zextload_i1_to_i32:
+; SI: buffer_load_ubyte
+; SI: buffer_store_dword
+; SI: s_endpgm
+
define void @global_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
%load = load i1 addrspace(1)* %in
%ext = zext i1 %load to i32
@@ -34,10 +71,11 @@ define void @global_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)*
ret void
}
-; SI-LABEL: @global_sextload_i1_to_i64
-; XSI: BUFFER_LOAD_BYTE
-; SI: BUFFER_STORE_DWORDX2
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}global_sextload_i1_to_i64:
+; SI: buffer_load_ubyte
+; SI: v_bfe_i32
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
define void @global_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
%load = load i1 addrspace(1)* %in
%ext = sext i1 %load to i64
@@ -45,10 +83,11 @@ define void @global_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)*
ret void
}
-; SI-LABEL: @global_zextload_i1_to_i64
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_STORE_DWORDX2
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}global_zextload_i1_to_i64:
+; SI: buffer_load_ubyte
+; SI: v_mov_b32_e32 {{v[0-9]+}}, 0
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
define void @global_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
%load = load i1 addrspace(1)* %in
%ext = zext i1 %load to i64
@@ -56,50 +95,52 @@ define void @global_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)*
ret void
}
-; SI-LABEL: @i1_arg
-; SI: BUFFER_LOAD_UBYTE
-; SI: V_AND_B32_e32
-; SI: BUFFER_STORE_BYTE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}i1_arg:
+; SI: buffer_load_ubyte
+; SI: v_and_b32_e32
+; SI: buffer_store_byte
+; SI: s_endpgm
define void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind {
store i1 %x, i1 addrspace(1)* %out, align 1
ret void
}
-; SI-LABEL: @i1_arg_zext_i32
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_STORE_DWORD
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}i1_arg_zext_i32:
+; SI: buffer_load_ubyte
+; SI: buffer_store_dword
+; SI: s_endpgm
define void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
%ext = zext i1 %x to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
ret void
}
-; SI-LABEL: @i1_arg_zext_i64
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_STORE_DWORDX2
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}i1_arg_zext_i64:
+; SI: buffer_load_ubyte
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
define void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
%ext = zext i1 %x to i64
store i64 %ext, i64 addrspace(1)* %out, align 8
ret void
}
-; SI-LABEL: @i1_arg_sext_i32
-; XSI: BUFFER_LOAD_BYTE
-; SI: BUFFER_STORE_DWORD
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}i1_arg_sext_i32:
+; SI: buffer_load_ubyte
+; SI: buffer_store_dword
+; SI: s_endpgm
define void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
%ext = sext i1 %x to i32
store i32 %ext, i32addrspace(1)* %out, align 4
ret void
}
-; SI-LABEL: @i1_arg_sext_i64
-; XSI: BUFFER_LOAD_BYTE
-; SI: BUFFER_STORE_DWORDX2
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}i1_arg_sext_i64:
+; SI: buffer_load_ubyte
+; SI: v_bfe_i32
+; SI: v_ashrrev_i32
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
define void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
%ext = sext i1 %x to i64
store i64 %ext, i64 addrspace(1)* %out, align 8
diff --git a/test/CodeGen/R600/load-input-fold.ll b/test/CodeGen/R600/load-input-fold.ll
index ca86d0e36907..265fa9bfeb42 100644
--- a/test/CodeGen/R600/load-input-fold.ll
+++ b/test/CodeGen/R600/load-input-fold.ll
@@ -1,5 +1,4 @@
;RUN: llc < %s -march=r600 -mcpu=cayman
-;REQUIRES: asserts
define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3) #0 {
main_body:
diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll
index 8905fbd3aeb6..5d043b423871 100644
--- a/test/CodeGen/R600/load.ll
+++ b/test/CodeGen/R600/load.ll
@@ -1,16 +1,16 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK --check-prefix=FUNC %s
; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600-CHECK --check-prefix=FUNC %s
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=FUNC %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=FUNC %s
;===------------------------------------------------------------------------===;
; GLOBAL ADDRESS SPACE
;===------------------------------------------------------------------------===;
; Load an i8 value from the global address space.
-; FUNC-LABEL: @load_i8
+; FUNC-LABEL: {{^}}load_i8:
; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
+; SI-CHECK: buffer_load_ubyte v{{[0-9]+}},
define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
%1 = load i8 addrspace(1)* %in
%2 = zext i8 %1 to i32
@@ -18,13 +18,13 @@ define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
ret void
}
-; FUNC-LABEL: @load_i8_sext
+; FUNC-LABEL: {{^}}load_i8_sext:
; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
; R600-CHECK: 24
; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
; R600-CHECK: 24
-; SI-CHECK: BUFFER_LOAD_SBYTE
+; SI-CHECK: buffer_load_sbyte
define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
entry:
%0 = load i8 addrspace(1)* %in
@@ -33,11 +33,11 @@ entry:
ret void
}
-; FUNC-LABEL: @load_v2i8
+; FUNC-LABEL: {{^}}load_v2i8:
; R600-CHECK: VTX_READ_8
; R600-CHECK: VTX_READ_8
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
entry:
%0 = load <2 x i8> addrspace(1)* %in
@@ -46,7 +46,7 @@ entry:
ret void
}
-; FUNC-LABEL: @load_v2i8_sext
+; FUNC-LABEL: {{^}}load_v2i8_sext:
; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
@@ -57,8 +57,8 @@ entry:
; R600-CHECK-DAG: 24
; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
; R600-CHECK-DAG: 24
-; SI-CHECK: BUFFER_LOAD_SBYTE
-; SI-CHECK: BUFFER_LOAD_SBYTE
+; SI-CHECK: buffer_load_sbyte
+; SI-CHECK: buffer_load_sbyte
define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
entry:
%0 = load <2 x i8> addrspace(1)* %in
@@ -67,15 +67,15 @@ entry:
ret void
}
-; FUNC-LABEL: @load_v4i8
+; FUNC-LABEL: {{^}}load_v4i8:
; R600-CHECK: VTX_READ_8
; R600-CHECK: VTX_READ_8
; R600-CHECK: VTX_READ_8
; R600-CHECK: VTX_READ_8
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
entry:
%0 = load <4 x i8> addrspace(1)* %in
@@ -84,7 +84,7 @@ entry:
ret void
}
-; FUNC-LABEL: @load_v4i8_sext
+; FUNC-LABEL: {{^}}load_v4i8_sext:
; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
; R600-CHECK-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
@@ -105,10 +105,10 @@ entry:
; R600-CHECK-DAG: 24
; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
; R600-CHECK-DAG: 24
-; SI-CHECK: BUFFER_LOAD_SBYTE
-; SI-CHECK: BUFFER_LOAD_SBYTE
-; SI-CHECK: BUFFER_LOAD_SBYTE
-; SI-CHECK: BUFFER_LOAD_SBYTE
+; SI-CHECK: buffer_load_sbyte
+; SI-CHECK: buffer_load_sbyte
+; SI-CHECK: buffer_load_sbyte
+; SI-CHECK: buffer_load_sbyte
define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
entry:
%0 = load <4 x i8> addrspace(1)* %in
@@ -118,9 +118,9 @@ entry:
}
; Load an i16 value from the global address space.
-; FUNC-LABEL: @load_i16
+; FUNC-LABEL: {{^}}load_i16:
; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: buffer_load_ushort
define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
entry:
%0 = load i16 addrspace(1)* %in
@@ -129,13 +129,13 @@ entry:
ret void
}
-; FUNC-LABEL: @load_i16_sext
+; FUNC-LABEL: {{^}}load_i16_sext:
; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
; R600-CHECK: 16
; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
; R600-CHECK: 16
-; SI-CHECK: BUFFER_LOAD_SSHORT
+; SI-CHECK: buffer_load_sshort
define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
entry:
%0 = load i16 addrspace(1)* %in
@@ -144,11 +144,11 @@ entry:
ret void
}
-; FUNC-LABEL: @load_v2i16
+; FUNC-LABEL: {{^}}load_v2i16:
; R600-CHECK: VTX_READ_16
; R600-CHECK: VTX_READ_16
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
entry:
%0 = load <2 x i16> addrspace(1)* %in
@@ -157,7 +157,7 @@ entry:
ret void
}
-; FUNC-LABEL: @load_v2i16_sext
+; FUNC-LABEL: {{^}}load_v2i16_sext:
; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
@@ -168,8 +168,8 @@ entry:
; R600-CHECK-DAG: 16
; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
; R600-CHECK-DAG: 16
-; SI-CHECK: BUFFER_LOAD_SSHORT
-; SI-CHECK: BUFFER_LOAD_SSHORT
+; SI-CHECK: buffer_load_sshort
+; SI-CHECK: buffer_load_sshort
define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
entry:
%0 = load <2 x i16> addrspace(1)* %in
@@ -178,15 +178,15 @@ entry:
ret void
}
-; FUNC-LABEL: @load_v4i16
+; FUNC-LABEL: {{^}}load_v4i16:
; R600-CHECK: VTX_READ_16
; R600-CHECK: VTX_READ_16
; R600-CHECK: VTX_READ_16
; R600-CHECK: VTX_READ_16
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
entry:
%0 = load <4 x i16> addrspace(1)* %in
@@ -195,7 +195,7 @@ entry:
ret void
}
-; FUNC-LABEL: @load_v4i16_sext
+; FUNC-LABEL: {{^}}load_v4i16_sext:
; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
; R600-CHECK-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
@@ -216,10 +216,10 @@ entry:
; R600-CHECK-DAG: 16
; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
; R600-CHECK-DAG: 16
-; SI-CHECK: BUFFER_LOAD_SSHORT
-; SI-CHECK: BUFFER_LOAD_SSHORT
-; SI-CHECK: BUFFER_LOAD_SSHORT
-; SI-CHECK: BUFFER_LOAD_SSHORT
+; SI-CHECK: buffer_load_sshort
+; SI-CHECK: buffer_load_sshort
+; SI-CHECK: buffer_load_sshort
+; SI-CHECK: buffer_load_sshort
define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
entry:
%0 = load <4 x i16> addrspace(1)* %in
@@ -229,10 +229,10 @@ entry:
}
; load an i32 value from the global address space.
-; FUNC-LABEL: @load_i32
+; FUNC-LABEL: {{^}}load_i32:
; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
-; SI-CHECK: BUFFER_LOAD_DWORD v{{[0-9]+}}
+; SI-CHECK: buffer_load_dword v{{[0-9]+}}
define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
%0 = load i32 addrspace(1)* %in
@@ -241,10 +241,10 @@ entry:
}
; load a f32 value from the global address space.
-; FUNC-LABEL: @load_f32
+; FUNC-LABEL: {{^}}load_f32:
; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
-; SI-CHECK: BUFFER_LOAD_DWORD v{{[0-9]+}}
+; SI-CHECK: buffer_load_dword v{{[0-9]+}}
define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
entry:
%0 = load float addrspace(1)* %in
@@ -253,10 +253,10 @@ entry:
}
; load a v2f32 value from the global address space
-; FUNC-LABEL: @load_v2f32
+; FUNC-LABEL: {{^}}load_v2f32:
; R600-CHECK: MEM_RAT
; R600-CHECK: VTX_READ_64
-; SI-CHECK: BUFFER_LOAD_DWORDX2
+; SI-CHECK: buffer_load_dwordx2
define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
entry:
%0 = load <2 x float> addrspace(1)* %in
@@ -264,9 +264,9 @@ entry:
ret void
}
-; FUNC-LABEL: @load_i64
+; FUNC-LABEL: {{^}}load_i64:
; R600-CHECK: VTX_READ_64
-; SI-CHECK: BUFFER_LOAD_DWORDX2
+; SI-CHECK: buffer_load_dwordx2
define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
entry:
%0 = load i64 addrspace(1)* %in
@@ -274,12 +274,12 @@ entry:
ret void
}
-; FUNC-LABEL: @load_i64_sext
+; FUNC-LABEL: {{^}}load_i64_sext:
; R600-CHECK: MEM_RAT
; R600-CHECK: MEM_RAT
; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.x
; R600-CHECK: 31
-; SI-CHECK: BUFFER_LOAD_DWORD
+; SI-CHECK: buffer_load_dword
define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
@@ -289,7 +289,7 @@ entry:
ret void
}
-; FUNC-LABEL: @load_i64_zext
+; FUNC-LABEL: {{^}}load_i64_zext:
; R600-CHECK: MEM_RAT
; R600-CHECK: MEM_RAT
define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
@@ -300,18 +300,18 @@ entry:
ret void
}
-; FUNC-LABEL: @load_v8i32
+; FUNC-LABEL: {{^}}load_v8i32:
; R600-CHECK: VTX_READ_128
; R600-CHECK: VTX_READ_128
; XXX: We should be using DWORDX4 instructions on SI.
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) {
entry:
%0 = load <8 x i32> addrspace(1)* %in
@@ -319,28 +319,28 @@ entry:
ret void
}
-; FUNC-LABEL: @load_v16i32
+; FUNC-LABEL: {{^}}load_v16i32:
; R600-CHECK: VTX_READ_128
; R600-CHECK: VTX_READ_128
; R600-CHECK: VTX_READ_128
; R600-CHECK: VTX_READ_128
; XXX: We should be using DWORDX4 instructions on SI.
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) {
entry:
%0 = load <16 x i32> addrspace(1)* %in
@@ -353,13 +353,13 @@ entry:
;===------------------------------------------------------------------------===;
; Load a sign-extended i8 value
-; FUNC-LABEL: @load_const_i8_sext
+; FUNC-LABEL: {{^}}load_const_i8_sext:
; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
; R600-CHECK: 24
; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
; R600-CHECK: 24
-; SI-CHECK: BUFFER_LOAD_SBYTE v{{[0-9]+}},
+; SI-CHECK: buffer_load_sbyte v{{[0-9]+}},
define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
entry:
%0 = load i8 addrspace(2)* %in
@@ -369,9 +369,9 @@ entry:
}
; Load an aligned i8 value
-; FUNC-LABEL: @load_const_i8_aligned
+; FUNC-LABEL: {{^}}load_const_i8_aligned:
; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
+; SI-CHECK: buffer_load_ubyte v{{[0-9]+}},
define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
entry:
%0 = load i8 addrspace(2)* %in
@@ -381,9 +381,9 @@ entry:
}
; Load an un-aligned i8 value
-; FUNC-LABEL: @load_const_i8_unaligned
+; FUNC-LABEL: {{^}}load_const_i8_unaligned:
; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
+; SI-CHECK: buffer_load_ubyte v{{[0-9]+}},
define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
entry:
%0 = getelementptr i8 addrspace(2)* %in, i32 1
@@ -394,13 +394,13 @@ entry:
}
; Load a sign-extended i16 value
-; FUNC-LABEL: @load_const_i16_sext
+; FUNC-LABEL: {{^}}load_const_i16_sext:
; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
; R600-CHECK: 16
; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
; R600-CHECK: 16
-; SI-CHECK: BUFFER_LOAD_SSHORT
+; SI-CHECK: buffer_load_sshort
define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
entry:
%0 = load i16 addrspace(2)* %in
@@ -410,9 +410,9 @@ entry:
}
; Load an aligned i16 value
-; FUNC-LABEL: @load_const_i16_aligned
+; FUNC-LABEL: {{^}}load_const_i16_aligned:
; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: buffer_load_ushort
define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
entry:
%0 = load i16 addrspace(2)* %in
@@ -422,9 +422,9 @@ entry:
}
; Load an un-aligned i16 value
-; FUNC-LABEL: @load_const_i16_unaligned
+; FUNC-LABEL: {{^}}load_const_i16_unaligned:
; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: buffer_load_ushort
define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
entry:
%0 = getelementptr i16 addrspace(2)* %in, i32 1
@@ -435,10 +435,10 @@ entry:
}
; Load an i32 value from the constant address space.
-; FUNC-LABEL: @load_const_addrspace_i32
+; FUNC-LABEL: {{^}}load_const_addrspace_i32:
; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
-; SI-CHECK: S_LOAD_DWORD s{{[0-9]+}}
+; SI-CHECK: s_load_dword s{{[0-9]+}}
define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
entry:
%0 = load i32 addrspace(2)* %in
@@ -447,10 +447,10 @@ entry:
}
; Load a f32 value from the constant address space.
-; FUNC-LABEL: @load_const_addrspace_f32
+; FUNC-LABEL: {{^}}load_const_addrspace_f32:
; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
-; SI-CHECK: S_LOAD_DWORD s{{[0-9]+}}
+; SI-CHECK: s_load_dword s{{[0-9]+}}
define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) {
%1 = load float addrspace(2)* %in
store float %1, float addrspace(1)* %out
@@ -462,11 +462,11 @@ define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(
;===------------------------------------------------------------------------===;
; Load an i8 value from the local address space.
-; FUNC-LABEL: @load_i8_local
+; FUNC-LABEL: {{^}}load_i8_local:
; R600-CHECK: LDS_UBYTE_READ_RET
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_U8
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_u8
define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
%1 = load i8 addrspace(3)* %in
%2 = zext i8 %1 to i32
@@ -474,12 +474,12 @@ define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
ret void
}
-; FUNC-LABEL: @load_i8_sext_local
+; FUNC-LABEL: {{^}}load_i8_sext_local:
; R600-CHECK: LDS_UBYTE_READ_RET
; R600-CHECK: ASHR
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_I8
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_i8
define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
entry:
%0 = load i8 addrspace(3)* %in
@@ -488,13 +488,13 @@ entry:
ret void
}
-; FUNC-LABEL: @load_v2i8_local
+; FUNC-LABEL: {{^}}load_v2i8_local:
; R600-CHECK: LDS_UBYTE_READ_RET
; R600-CHECK: LDS_UBYTE_READ_RET
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_U8
-; SI-CHECK: DS_READ_U8
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_u8
+; SI-CHECK: ds_read_u8
define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
entry:
%0 = load <2 x i8> addrspace(3)* %in
@@ -503,15 +503,15 @@ entry:
ret void
}
-; FUNC-LABEL: @load_v2i8_sext_local
+; FUNC-LABEL: {{^}}load_v2i8_sext_local:
; R600-CHECK-DAG: LDS_UBYTE_READ_RET
; R600-CHECK-DAG: LDS_UBYTE_READ_RET
; R600-CHECK-DAG: ASHR
; R600-CHECK-DAG: ASHR
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_I8
-; SI-CHECK: DS_READ_I8
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_i8
+; SI-CHECK: ds_read_i8
define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
entry:
%0 = load <2 x i8> addrspace(3)* %in
@@ -520,17 +520,17 @@ entry:
ret void
}
-; FUNC-LABEL: @load_v4i8_local
+; FUNC-LABEL: {{^}}load_v4i8_local:
; R600-CHECK: LDS_UBYTE_READ_RET
; R600-CHECK: LDS_UBYTE_READ_RET
; R600-CHECK: LDS_UBYTE_READ_RET
; R600-CHECK: LDS_UBYTE_READ_RET
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_U8
-; SI-CHECK: DS_READ_U8
-; SI-CHECK: DS_READ_U8
-; SI-CHECK: DS_READ_U8
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_u8
+; SI-CHECK: ds_read_u8
+; SI-CHECK: ds_read_u8
+; SI-CHECK: ds_read_u8
define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
entry:
%0 = load <4 x i8> addrspace(3)* %in
@@ -539,7 +539,7 @@ entry:
ret void
}
-; FUNC-LABEL: @load_v4i8_sext_local
+; FUNC-LABEL: {{^}}load_v4i8_sext_local:
; R600-CHECK-DAG: LDS_UBYTE_READ_RET
; R600-CHECK-DAG: LDS_UBYTE_READ_RET
; R600-CHECK-DAG: LDS_UBYTE_READ_RET
@@ -548,12 +548,12 @@ entry:
; R600-CHECK-DAG: ASHR
; R600-CHECK-DAG: ASHR
; R600-CHECK-DAG: ASHR
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_I8
-; SI-CHECK: DS_READ_I8
-; SI-CHECK: DS_READ_I8
-; SI-CHECK: DS_READ_I8
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_i8
+; SI-CHECK: ds_read_i8
+; SI-CHECK: ds_read_i8
+; SI-CHECK: ds_read_i8
define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
entry:
%0 = load <4 x i8> addrspace(3)* %in
@@ -563,11 +563,11 @@ entry:
}
; Load an i16 value from the local address space.
-; FUNC-LABEL: @load_i16_local
+; FUNC-LABEL: {{^}}load_i16_local:
; R600-CHECK: LDS_USHORT_READ_RET
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_U16
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_u16
define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
entry:
%0 = load i16 addrspace(3)* %in
@@ -576,12 +576,12 @@ entry:
ret void
}
-; FUNC-LABEL: @load_i16_sext_local
+; FUNC-LABEL: {{^}}load_i16_sext_local:
; R600-CHECK: LDS_USHORT_READ_RET
; R600-CHECK: ASHR
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_I16
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_i16
define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
entry:
%0 = load i16 addrspace(3)* %in
@@ -590,13 +590,13 @@ entry:
ret void
}
-; FUNC-LABEL: @load_v2i16_local
+; FUNC-LABEL: {{^}}load_v2i16_local:
; R600-CHECK: LDS_USHORT_READ_RET
; R600-CHECK: LDS_USHORT_READ_RET
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_U16
-; SI-CHECK: DS_READ_U16
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_u16
+; SI-CHECK: ds_read_u16
define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
entry:
%0 = load <2 x i16> addrspace(3)* %in
@@ -605,15 +605,15 @@ entry:
ret void
}
-; FUNC-LABEL: @load_v2i16_sext_local
+; FUNC-LABEL: {{^}}load_v2i16_sext_local:
; R600-CHECK-DAG: LDS_USHORT_READ_RET
; R600-CHECK-DAG: LDS_USHORT_READ_RET
; R600-CHECK-DAG: ASHR
; R600-CHECK-DAG: ASHR
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_I16
-; SI-CHECK: DS_READ_I16
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_i16
+; SI-CHECK: ds_read_i16
define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
entry:
%0 = load <2 x i16> addrspace(3)* %in
@@ -622,17 +622,17 @@ entry:
ret void
}
-; FUNC-LABEL: @load_v4i16_local
+; FUNC-LABEL: {{^}}load_v4i16_local:
; R600-CHECK: LDS_USHORT_READ_RET
; R600-CHECK: LDS_USHORT_READ_RET
; R600-CHECK: LDS_USHORT_READ_RET
; R600-CHECK: LDS_USHORT_READ_RET
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_U16
-; SI-CHECK: DS_READ_U16
-; SI-CHECK: DS_READ_U16
-; SI-CHECK: DS_READ_U16
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_u16
+; SI-CHECK: ds_read_u16
+; SI-CHECK: ds_read_u16
+; SI-CHECK: ds_read_u16
define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
entry:
%0 = load <4 x i16> addrspace(3)* %in
@@ -641,7 +641,7 @@ entry:
ret void
}
-; FUNC-LABEL: @load_v4i16_sext_local
+; FUNC-LABEL: {{^}}load_v4i16_sext_local:
; R600-CHECK-DAG: LDS_USHORT_READ_RET
; R600-CHECK-DAG: LDS_USHORT_READ_RET
; R600-CHECK-DAG: LDS_USHORT_READ_RET
@@ -650,12 +650,12 @@ entry:
; R600-CHECK-DAG: ASHR
; R600-CHECK-DAG: ASHR
; R600-CHECK-DAG: ASHR
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_I16
-; SI-CHECK: DS_READ_I16
-; SI-CHECK: DS_READ_I16
-; SI-CHECK: DS_READ_I16
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_i16
+; SI-CHECK: ds_read_i16
+; SI-CHECK: ds_read_i16
+; SI-CHECK: ds_read_i16
define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
entry:
%0 = load <4 x i16> addrspace(3)* %in
@@ -665,11 +665,11 @@ entry:
}
; load an i32 value from the local address space.
-; FUNC-LABEL: @load_i32_local
+; FUNC-LABEL: {{^}}load_i32_local:
; R600-CHECK: LDS_READ_RET
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_B32
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_b32
define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
%0 = load i32 addrspace(3)* %in
@@ -678,10 +678,10 @@ entry:
}
; load a f32 value from the local address space.
-; FUNC-LABEL: @load_f32_local
+; FUNC-LABEL: {{^}}load_f32_local:
; R600-CHECK: LDS_READ_RET
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_B32
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_b32
define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
entry:
%0 = load float addrspace(3)* %in
@@ -690,14 +690,50 @@ entry:
}
; load a v2f32 value from the local address space
-; FUNC-LABEL: @load_v2f32_local
+; FUNC-LABEL: {{^}}load_v2f32_local:
; R600-CHECK: LDS_READ_RET
; R600-CHECK: LDS_READ_RET
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_B64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_b64
define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
entry:
%0 = load <2 x float> addrspace(3)* %in
store <2 x float> %0, <2 x float> addrspace(1)* %out
ret void
}
+
+; Test loading a i32 and v2i32 value from the same base pointer.
+; FUNC-LABEL: {{^}}load_i32_v2i32_local:
+; R600-CHECK: LDS_READ_RET
+; R600-CHECK: LDS_READ_RET
+; R600-CHECK: LDS_READ_RET
+; SI-CHECK-DAG: ds_read_b32
+; SI-CHECK-DAG: ds_read2_b32
+define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)* %in) {
+ %scalar = load i32 addrspace(3)* %in
+ %tmp0 = bitcast i32 addrspace(3)* %in to <2 x i32> addrspace(3)*
+ %vec_ptr = getelementptr <2 x i32> addrspace(3)* %tmp0, i32 2
+ %vec0 = load <2 x i32> addrspace(3)* %vec_ptr, align 4
+ %vec1 = insertelement <2 x i32> <i32 0, i32 0>, i32 %scalar, i32 0
+ %vec = add <2 x i32> %vec0, %vec1
+ store <2 x i32> %vec, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+
+@lds = addrspace(3) global [512 x i32] undef, align 4
+
+; On SI we need to make sure that the base offset is a register and not
+; an immediate.
+; FUNC-LABEL: {{^}}load_i32_local_const_ptr:
+; SI-CHECK: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0
+; SI-CHECK: ds_read_b32 v0, v[[ZERO]] offset:4
+; R600-CHECK: LDS_READ_RET
+define void @load_i32_local_const_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
+entry:
+ %tmp0 = getelementptr [512 x i32] addrspace(3)* @lds, i32 0, i32 1
+ %tmp1 = load i32 addrspace(3)* %tmp0
+ %tmp2 = getelementptr i32 addrspace(1)* %out, i32 1
+ store i32 %tmp1, i32 addrspace(1)* %tmp2
+ ret void
+}
diff --git a/test/CodeGen/R600/load.vec.ll b/test/CodeGen/R600/load.vec.ll
index 81a6310bbcc9..bdd35d8f2f35 100644
--- a/test/CodeGen/R600/load.vec.ll
+++ b/test/CodeGen/R600/load.vec.ll
@@ -1,11 +1,11 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
; load a v2i32 value from the global address space.
-; EG-CHECK: @load_v2i32
+; EG-CHECK: {{^}}load_v2i32:
; EG-CHECK: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0
-; SI-CHECK: @load_v2i32
-; SI-CHECK: BUFFER_LOAD_DWORDX2 v[{{[0-9]+:[0-9]+}}]
+; SI-CHECK: {{^}}load_v2i32:
+; SI-CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}]
define void @load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%a = load <2 x i32> addrspace(1) * %in
store <2 x i32> %a, <2 x i32> addrspace(1)* %out
@@ -13,10 +13,10 @@ define void @load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %i
}
; load a v4i32 value from the global address space.
-; EG-CHECK: @load_v4i32
+; EG-CHECK: {{^}}load_v4i32:
; EG-CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0
-; SI-CHECK: @load_v4i32
-; SI-CHECK: BUFFER_LOAD_DWORDX4 v[{{[0-9]+:[0-9]+}}]
+; SI-CHECK: {{^}}load_v4i32:
+; SI-CHECK: buffer_load_dwordx4 v[{{[0-9]+:[0-9]+}}]
define void @load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%a = load <4 x i32> addrspace(1) * %in
store <4 x i32> %a, <4 x i32> addrspace(1)* %out
diff --git a/test/CodeGen/R600/load64.ll b/test/CodeGen/R600/load64.ll
index a117557e98e0..b32d2d56267e 100644
--- a/test/CodeGen/R600/load64.ll
+++ b/test/CodeGen/R600/load64.ll
@@ -1,18 +1,18 @@
-; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s
; load a f64 value from the global address space.
-; CHECK-LABEL: @load_f64:
-; CHECK: BUFFER_LOAD_DWORDX2 v[{{[0-9]+:[0-9]+}}]
-; CHECK: BUFFER_STORE_DWORDX2 v[{{[0-9]+:[0-9]+}}]
+; CHECK-LABEL: {{^}}load_f64:
+; CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}]
+; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}]
define void @load_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
%1 = load double addrspace(1)* %in
store double %1, double addrspace(1)* %out
ret void
}
-; CHECK-LABEL: @load_i64:
-; CHECK: BUFFER_LOAD_DWORDX2 v[{{[0-9]+:[0-9]+}}]
-; CHECK: BUFFER_STORE_DWORDX2 v[{{[0-9]+:[0-9]+}}]
+; CHECK-LABEL: {{^}}load_i64:
+; CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}]
+; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}]
define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%tmp = load i64 addrspace(1)* %in
store i64 %tmp, i64 addrspace(1)* %out, align 8
@@ -20,9 +20,9 @@ define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
}
; Load a f64 value from the constant address space.
-; CHECK-LABEL: @load_const_addrspace_f64:
-; CHECK: S_LOAD_DWORDX2 s[{{[0-9]+:[0-9]+}}]
-; CHECK: BUFFER_STORE_DWORDX2 v[{{[0-9]+:[0-9]+}}]
+; CHECK-LABEL: {{^}}load_const_addrspace_f64:
+; CHECK: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}]
+; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}]
define void @load_const_addrspace_f64(double addrspace(1)* %out, double addrspace(2)* %in) {
%1 = load double addrspace(2)* %in
store double %1, double addrspace(1)* %out
diff --git a/test/CodeGen/R600/local-64.ll b/test/CodeGen/R600/local-64.ll
index c52b41bb1b5a..f975bc1f56b0 100644
--- a/test/CodeGen/R600/local-64.ll
+++ b/test/CodeGen/R600/local-64.ll
@@ -1,8 +1,9 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck --check-prefix=SI --check-prefix=BOTH %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=BOTH %s
-; SI-LABEL: @local_i32_load
-; SI: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 0x1c, [M0]
-; SI: BUFFER_STORE_DWORD [[REG]],
+; BOTH-LABEL: {{^}}local_i32_load
+; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}} offset:28 [M0]
+; BOTH: buffer_store_dword [[REG]],
define void @local_i32_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
%gep = getelementptr i32 addrspace(3)* %in, i32 7
%val = load i32 addrspace(3)* %gep, align 4
@@ -10,19 +11,19 @@ define void @local_i32_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounw
ret void
}
-; SI-LABEL: @local_i32_load_0_offset
-; SI: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 0x0, [M0]
-; SI: BUFFER_STORE_DWORD [[REG]],
+; BOTH-LABEL: {{^}}local_i32_load_0_offset
+; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}} [M0]
+; BOTH: buffer_store_dword [[REG]],
define void @local_i32_load_0_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
%val = load i32 addrspace(3)* %in, align 4
store i32 %val, i32 addrspace(1)* %out, align 4
ret void
}
-; SI-LABEL: @local_i8_load_i16_max_offset
-; SI-NOT: ADD
-; SI: DS_READ_U8 [[REG:v[0-9]+]], {{v[0-9]+}}, 0xffff, [M0]
-; SI: BUFFER_STORE_BYTE [[REG]],
+; BOTH-LABEL: {{^}}local_i8_load_i16_max_offset:
+; BOTH-NOT: ADD
+; BOTH: ds_read_u8 [[REG:v[0-9]+]], {{v[0-9]+}} offset:65535 [M0]
+; BOTH: buffer_store_byte [[REG]],
define void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
%gep = getelementptr i8 addrspace(3)* %in, i32 65535
%val = load i8 addrspace(3)* %gep, align 4
@@ -30,11 +31,14 @@ define void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)
ret void
}
-; SI-LABEL: @local_i8_load_over_i16_max_offset
-; SI: S_ADD_I32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000
-; SI: V_MOV_B32_e32 [[VREGADDR:v[0-9]+]], [[ADDR]]
-; SI: DS_READ_U8 [[REG:v[0-9]+]], [[VREGADDR]], 0x0, [M0]
-; SI: BUFFER_STORE_BYTE [[REG]],
+; BOTH-LABEL: {{^}}local_i8_load_over_i16_max_offset:
+; The LDS offset will be 65536 bytes, which is larger than the size of LDS on
+; SI, which is why it is being OR'd with the base pointer.
+; SI: s_or_b32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000
+; CI: s_add_i32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000
+; BOTH: v_mov_b32_e32 [[VREGADDR:v[0-9]+]], [[ADDR]]
+; BOTH: ds_read_u8 [[REG:v[0-9]+]], [[VREGADDR]] [M0]
+; BOTH: buffer_store_byte [[REG]],
define void @local_i8_load_over_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
%gep = getelementptr i8 addrspace(3)* %in, i32 65536
%val = load i8 addrspace(3)* %gep, align 4
@@ -42,10 +46,10 @@ define void @local_i8_load_over_i16_max_offset(i8 addrspace(1)* %out, i8 addrspa
ret void
}
-; SI-LABEL: @local_i64_load
-; SI-NOT: ADD
-; SI: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}}, 0x38, [M0]
-; SI: BUFFER_STORE_DWORDX2 [[REG]],
+; BOTH-LABEL: {{^}}local_i64_load:
+; BOTH-NOT: ADD
+; BOTH: ds_read_b64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56 [M0]
+; BOTH: buffer_store_dwordx2 [[REG]],
define void @local_i64_load(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
%gep = getelementptr i64 addrspace(3)* %in, i32 7
%val = load i64 addrspace(3)* %gep, align 8
@@ -53,19 +57,19 @@ define void @local_i64_load(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounw
ret void
}
-; SI-LABEL: @local_i64_load_0_offset
-; SI: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0x0, [M0]
-; SI: BUFFER_STORE_DWORDX2 [[REG]],
+; BOTH-LABEL: {{^}}local_i64_load_0_offset
+; BOTH: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} [M0]
+; BOTH: buffer_store_dwordx2 [[REG]],
define void @local_i64_load_0_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
%val = load i64 addrspace(3)* %in, align 8
store i64 %val, i64 addrspace(1)* %out, align 8
ret void
}
-; SI-LABEL: @local_f64_load
-; SI-NOT: ADD
-; SI: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}}, 0x38, [M0]
-; SI: BUFFER_STORE_DWORDX2 [[REG]],
+; BOTH-LABEL: {{^}}local_f64_load:
+; BOTH-NOT: ADD
+; BOTH: ds_read_b64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56 [M0]
+; BOTH: buffer_store_dwordx2 [[REG]],
define void @local_f64_load(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
%gep = getelementptr double addrspace(3)* %in, i32 7
%val = load double addrspace(3)* %gep, align 8
@@ -73,85 +77,89 @@ define void @local_f64_load(double addrspace(1)* %out, double addrspace(3)* %in)
ret void
}
-; SI-LABEL: @local_f64_load_0_offset
-; SI: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0x0, [M0]
-; SI: BUFFER_STORE_DWORDX2 [[REG]],
+; BOTH-LABEL: {{^}}local_f64_load_0_offset
+; BOTH: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} [M0]
+; BOTH: buffer_store_dwordx2 [[REG]],
define void @local_f64_load_0_offset(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
%val = load double addrspace(3)* %in, align 8
store double %val, double addrspace(1)* %out, align 8
ret void
}
-; SI-LABEL: @local_i64_store
-; SI-NOT: ADD
-; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x38 [M0]
+; BOTH-LABEL: {{^}}local_i64_store:
+; BOTH-NOT: ADD
+; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56 [M0]
define void @local_i64_store(i64 addrspace(3)* %out) nounwind {
%gep = getelementptr i64 addrspace(3)* %out, i32 7
store i64 5678, i64 addrspace(3)* %gep, align 8
ret void
}
-; SI-LABEL: @local_i64_store_0_offset
-; SI-NOT: ADD
-; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
+; BOTH-LABEL: {{^}}local_i64_store_0_offset:
+; BOTH-NOT: ADD
+; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
define void @local_i64_store_0_offset(i64 addrspace(3)* %out) nounwind {
store i64 1234, i64 addrspace(3)* %out, align 8
ret void
}
-; SI-LABEL: @local_f64_store
-; SI-NOT: ADD
-; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x38 [M0]
+; BOTH-LABEL: {{^}}local_f64_store:
+; BOTH-NOT: ADD
+; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56 [M0]
define void @local_f64_store(double addrspace(3)* %out) nounwind {
%gep = getelementptr double addrspace(3)* %out, i32 7
store double 16.0, double addrspace(3)* %gep, align 8
ret void
}
-; SI-LABEL: @local_f64_store_0_offset
-; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
+; BOTH-LABEL: {{^}}local_f64_store_0_offset
+; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
define void @local_f64_store_0_offset(double addrspace(3)* %out) nounwind {
store double 20.0, double addrspace(3)* %out, align 8
ret void
}
-; SI-LABEL: @local_v2i64_store
-; SI-NOT: ADD
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x78 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x70 [M0]
+; BOTH-LABEL: {{^}}local_v2i64_store:
+; BOTH-NOT: ADD
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:112 [M0]
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:120 [M0]
+; BOTH: s_endpgm
define void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounwind {
%gep = getelementptr <2 x i64> addrspace(3)* %out, i32 7
store <2 x i64> <i64 5678, i64 5678>, <2 x i64> addrspace(3)* %gep, align 16
ret void
}
-; SI-LABEL: @local_v2i64_store_0_offset
-; SI-NOT: ADD
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x8 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
+; BOTH-LABEL: {{^}}local_v2i64_store_0_offset:
+; BOTH-NOT: ADD
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:8 [M0]
+; BOTH: s_endpgm
define void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %out) nounwind {
store <2 x i64> <i64 1234, i64 1234>, <2 x i64> addrspace(3)* %out, align 16
ret void
}
-; SI-LABEL: @local_v4i64_store
-; SI-NOT: ADD
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xf8 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xf0 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xe8 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xe0 [M0]
+; BOTH-LABEL: {{^}}local_v4i64_store:
+; BOTH-NOT: ADD
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:224 [M0]
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:232 [M0]
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:240 [M0]
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:248 [M0]
+; BOTH: s_endpgm
define void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounwind {
%gep = getelementptr <4 x i64> addrspace(3)* %out, i32 7
store <4 x i64> <i64 5678, i64 5678, i64 5678, i64 5678>, <4 x i64> addrspace(3)* %gep, align 16
ret void
}
-; SI-LABEL: @local_v4i64_store_0_offset
-; SI-NOT: ADD
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x18 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x10 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x8 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
+; BOTH-LABEL: {{^}}local_v4i64_store_0_offset:
+; BOTH-NOT: ADD
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:8 [M0]
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:16 [M0]
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:24 [M0]
+; BOTH: s_endpgm
define void @local_v4i64_store_0_offset(<4 x i64> addrspace(3)* %out) nounwind {
store <4 x i64> <i64 1234, i64 1234, i64 1234, i64 1234>, <4 x i64> addrspace(3)* %out, align 16
ret void
diff --git a/test/CodeGen/R600/local-atomics.ll b/test/CodeGen/R600/local-atomics.ll
index 5a44951055ea..16d3173f3692 100644
--- a/test/CodeGen/R600/local-atomics.ll
+++ b/test/CodeGen/R600/local-atomics.ll
@@ -1,21 +1,25 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-
-; FUNC-LABEL: @lds_atomic_xchg_ret_i32:
-; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]],
-; SI: V_MOV_B32_e32 [[DATA:v[0-9]+]], 4
-; SI: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; SI: DS_WRXCHG_RTN_B32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]], 0x0, [M0]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32:
+; EG: LDS_WRXCHG_RET *
+; SI: s_load_dword [[SPTR:s[0-9]+]],
+; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
+; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; SI: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
define void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @lds_atomic_xchg_ret_i32_offset:
-; SI: DS_WRXCHG_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32_offset:
+; EG: LDS_WRXCHG_RET *
+; SI: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst
@@ -24,22 +28,24 @@ define void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspac
}
; XXX - Is it really necessary to load 4 into VGPR?
-; FUNC-LABEL: @lds_atomic_add_ret_i32:
-; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]],
-; SI: V_MOV_B32_e32 [[DATA:v[0-9]+]], 4
-; SI: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; SI: DS_ADD_RTN_U32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]], 0x0, [M0]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32:
+; EG: LDS_ADD_RET *
+; SI: s_load_dword [[SPTR:s[0-9]+]],
+; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
+; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; SI: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
define void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @lds_atomic_add_ret_i32_offset:
-; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_offset:
+; EG: LDS_ADD_RET *
+; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
@@ -47,22 +53,36 @@ define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
ret void
}
-; FUNC-LABEL: @lds_atomic_inc_ret_i32:
-; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
-; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
-; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x0
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_bad_si_offset:
+; EG: LDS_ADD_RET *
+; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} [M0]
+; CI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
+define void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
+ %sub = sub i32 %a, %b
+ %add = add i32 %sub, 4
+ %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
+ %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
+ store i32 %result, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32:
+; EG: LDS_ADD_RET *
+; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0]
+; SI: s_endpgm
define void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @lds_atomic_inc_ret_i32_offset:
-; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
-; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
-; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x10
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32_offset:
+; EG: LDS_ADD_RET *
+; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
+; SI: s_endpgm
define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
@@ -70,18 +90,34 @@ define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
ret void
}
-; FUNC-LABEL: @lds_atomic_sub_ret_i32:
-; SI: DS_SUB_RTN_U32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32_bad_si_offset:
+; EG: LDS_ADD_RET *
+; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} [M0]
+; CI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
+define void @lds_atomic_inc_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
+ %sub = sub i32 %a, %b
+ %add = add i32 %sub, 4
+ %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
+ %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
+ store i32 %result, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32:
+; EG: LDS_SUB_RET *
+; SI: ds_sub_rtn_u32
+; SI: s_endpgm
define void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @lds_atomic_sub_ret_i32_offset:
-; SI: DS_SUB_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32_offset:
+; EG: LDS_SUB_RET *
+; SI: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst
@@ -89,22 +125,22 @@ define void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
ret void
}
-; FUNC-LABEL: @lds_atomic_dec_ret_i32:
-; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
-; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
-; SI: DS_DEC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x0
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i32:
+; EG: LDS_SUB_RET *
+; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; SI: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0]
+; SI: s_endpgm
define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @lds_atomic_dec_ret_i32_offset:
-; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
-; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
-; SI: DS_DEC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x10
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i32_offset:
+; EG: LDS_SUB_RET *
+; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; SI: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
+; SI: s_endpgm
define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst
@@ -112,18 +148,20 @@ define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
ret void
}
-; FUNC-LABEL: @lds_atomic_and_ret_i32:
-; SI: DS_AND_RTN_B32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32:
+; EG: LDS_AND_RET *
+; SI: ds_and_rtn_b32
+; SI: s_endpgm
define void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @lds_atomic_and_ret_i32_offset:
-; SI: DS_AND_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32_offset:
+; EG: LDS_AND_RET *
+; SI: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst
@@ -131,18 +169,20 @@ define void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
ret void
}
-; FUNC-LABEL: @lds_atomic_or_ret_i32:
-; SI: DS_OR_RTN_B32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32:
+; EG: LDS_OR_RET *
+; SI: ds_or_rtn_b32
+; SI: s_endpgm
define void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @lds_atomic_or_ret_i32_offset:
-; SI: DS_OR_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32_offset:
+; EG: LDS_OR_RET *
+; SI: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst
@@ -150,18 +190,20 @@ define void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(
ret void
}
-; FUNC-LABEL: @lds_atomic_xor_ret_i32:
-; SI: DS_XOR_RTN_B32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32:
+; EG: LDS_XOR_RET *
+; SI: ds_xor_rtn_b32
+; SI: s_endpgm
define void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @lds_atomic_xor_ret_i32_offset:
-; SI: DS_XOR_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32_offset:
+; EG: LDS_XOR_RET *
+; SI: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst
@@ -170,25 +212,27 @@ define void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
}
; FIXME: There is no atomic nand instr
-; XFUNC-LABEL: @lds_atomic_nand_ret_i32:uction, so we somehow need to expand this.
+; XFUNC-LABEL: {{^}}lds_atomic_nand_ret_i32:uction, so we somehow need to expand this.
; define void @lds_atomic_nand_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
; %result = atomicrmw nand i32 addrspace(3)* %ptr, i32 4 seq_cst
; store i32 %result, i32 addrspace(1)* %out, align 4
; ret void
; }
-; FUNC-LABEL: @lds_atomic_min_ret_i32:
-; SI: DS_MIN_RTN_I32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32:
+; EG: LDS_MIN_INT_RET *
+; SI: ds_min_rtn_i32
+; SI: s_endpgm
define void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @lds_atomic_min_ret_i32_offset:
-; SI: DS_MIN_RTN_I32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32_offset:
+; EG: LDS_MIN_INT_RET *
+; SI: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst
@@ -196,18 +240,20 @@ define void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
ret void
}
-; FUNC-LABEL: @lds_atomic_max_ret_i32:
-; SI: DS_MAX_RTN_I32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32:
+; EG: LDS_MAX_INT_RET *
+; SI: ds_max_rtn_i32
+; SI: s_endpgm
define void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @lds_atomic_max_ret_i32_offset:
-; SI: DS_MAX_RTN_I32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32_offset:
+; EG: LDS_MAX_INT_RET *
+; SI: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst
@@ -215,18 +261,20 @@ define void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
ret void
}
-; FUNC-LABEL: @lds_atomic_umin_ret_i32:
-; SI: DS_MIN_RTN_U32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32:
+; EG: LDS_MIN_UINT_RET *
+; SI: ds_min_rtn_u32
+; SI: s_endpgm
define void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @lds_atomic_umin_ret_i32_offset:
-; SI: DS_MIN_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32_offset:
+; EG: LDS_MIN_UINT_RET *
+; SI: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst
@@ -234,21 +282,269 @@ define void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspac
ret void
}
-; FUNC-LABEL: @lds_atomic_umax_ret_i32:
-; SI: DS_MAX_RTN_U32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32:
+; EG: LDS_MAX_UINT_RET *
+; SI: ds_max_rtn_u32
+; SI: s_endpgm
define void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @lds_atomic_umax_ret_i32_offset:
-; SI: DS_MAX_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32_offset:
+; EG: LDS_MAX_UINT_RET *
+; SI: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
define void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
+
+; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32:
+; SI: s_load_dword [[SPTR:s[0-9]+]],
+; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
+; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; SI: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
+; SI: s_endpgm
+define void @lds_atomic_xchg_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+ %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32_offset:
+; SI: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
+define void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+ %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst
+ ret void
+}
+
+; XXX - Is it really necessary to load 4 into VGPR?
+; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32:
+; SI: s_load_dword [[SPTR:s[0-9]+]],
+; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
+; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; SI: ds_add_u32 [[VPTR]], [[DATA]] [M0]
+; SI: s_endpgm
+define void @lds_atomic_add_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+ %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_offset:
+; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
+define void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+ %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_bad_si_offset
+; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} [M0]
+; CI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
+; SI: s_endpgm
+define void @lds_atomic_add_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
+ %sub = sub i32 %a, %b
+ %add = add i32 %sub, 4
+ %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
+ %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32:
+; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; SI: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] [M0]
+; SI: s_endpgm
+define void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+ %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32_offset:
+; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; SI: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] offset:16
+; SI: s_endpgm
+define void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+ %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32_bad_si_offset:
+; SI: ds_inc_u32 v{{[0-9]+}}, v{{[0-9]+}}
+; CI: ds_inc_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
+define void @lds_atomic_inc_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
+ %sub = sub i32 %a, %b
+ %add = add i32 %sub, 4
+ %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
+ %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32:
+; SI: ds_sub_u32
+; SI: s_endpgm
+define void @lds_atomic_sub_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+ %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32_offset:
+; SI: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
+define void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+ %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32:
+; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; SI: ds_dec_u32 v{{[0-9]+}}, [[NEGONE]]
+; SI: s_endpgm
+define void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+ %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32_offset:
+; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; SI: ds_dec_u32 v{{[0-9]+}}, [[NEGONE]] offset:16
+; SI: s_endpgm
+define void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+ %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32:
+; SI: ds_and_b32
+; SI: s_endpgm
+define void @lds_atomic_and_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+ %result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32_offset:
+; SI: ds_and_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
+define void @lds_atomic_and_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+ %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32:
+; SI: ds_or_b32
+; SI: s_endpgm
+define void @lds_atomic_or_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+ %result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32_offset:
+; SI: ds_or_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
+define void @lds_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+ %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32:
+; SI: ds_xor_b32
+; SI: s_endpgm
+define void @lds_atomic_xor_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+ %result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32_offset:
+; SI: ds_xor_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
+define void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+ %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst
+ ret void
+}
+
+; FIXME: There is no atomic nand instr
+; XFUNC-LABEL: {{^}}lds_atomic_nand_noret_i32:uction, so we somehow need to expand this.
+; define void @lds_atomic_nand_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+; %result = atomicrmw nand i32 addrspace(3)* %ptr, i32 4 seq_cst
+; ret void
+; }
+
+; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32:
+; SI: ds_min_i32
+; SI: s_endpgm
+define void @lds_atomic_min_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+ %result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32_offset:
+; SI: ds_min_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
+define void @lds_atomic_min_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+ %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32:
+; SI: ds_max_i32
+; SI: s_endpgm
+define void @lds_atomic_max_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+ %result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32_offset:
+; SI: ds_max_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
+define void @lds_atomic_max_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+ %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32:
+; SI: ds_min_u32
+; SI: s_endpgm
+define void @lds_atomic_umin_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+ %result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32_offset:
+; SI: ds_min_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
+define void @lds_atomic_umin_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+ %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32:
+; SI: ds_max_u32
+; SI: s_endpgm
+define void @lds_atomic_umax_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+ %result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32_offset:
+; SI: ds_max_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
+define void @lds_atomic_umax_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+ %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst
+ ret void
+}
diff --git a/test/CodeGen/R600/local-atomics64.ll b/test/CodeGen/R600/local-atomics64.ll
index 849b033d84a9..ce6ddbd66265 100644
--- a/test/CodeGen/R600/local-atomics64.ll
+++ b/test/CodeGen/R600/local-atomics64.ll
@@ -1,17 +1,17 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s
-; FUNC-LABEL: @lds_atomic_xchg_ret_i64:
-; SI: DS_WRXCHG_RTN_B64
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i64:
+; SI: ds_wrxchg_rtn_b64
+; SI: s_endpgm
define void @lds_atomic_xchg_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}
-; FUNC-LABEL: @lds_atomic_xchg_ret_i64_offset:
-; SI: DS_WRXCHG_RTN_B64 {{.*}} 0x20
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i64_offset:
+; SI: ds_wrxchg_rtn_b64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst
@@ -19,24 +19,23 @@ define void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspac
ret void
}
-; FUNC-LABEL: @lds_atomic_add_ret_i64:
-; SI: DS_ADD_RTN_U64
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_add_ret_i64:
+; SI: ds_add_rtn_u64
+; SI: s_endpgm
define void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}
-; FUNC-LABEL: @lds_atomic_add_ret_i64_offset:
-; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: S_MOV_B64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, 9
-; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
-; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
-; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
-; SI: DS_ADD_RTN_U64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}, 0x20, [M0]
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_add_ret_i64_offset:
+; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
+; SI-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
+; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; SI: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0]
+; SI: buffer_store_dwordx2 [[RESULT]],
+; SI: s_endpgm
define void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i64 4
%result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst
@@ -44,22 +43,21 @@ define void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
ret void
}
-; FUNC-LABEL: @lds_atomic_inc_ret_i64:
-; SI: S_MOV_B64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1
-; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
-; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
-; SI: DS_INC_RTN_U64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}},
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i64:
+; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
+; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
+; SI: ds_inc_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
+; SI: buffer_store_dwordx2 [[RESULT]],
+; SI: s_endpgm
define void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}
-; FUNC-LABEL: @lds_atomic_inc_ret_i64_offset:
-; SI: DS_INC_RTN_U64 {{.*}} 0x20
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i64_offset:
+; SI: ds_inc_rtn_u64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst
@@ -67,18 +65,18 @@ define void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
ret void
}
-; FUNC-LABEL: @lds_atomic_sub_ret_i64:
-; SI: DS_SUB_RTN_U64
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i64:
+; SI: ds_sub_rtn_u64
+; SI: s_endpgm
define void @lds_atomic_sub_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}
-; FUNC-LABEL: @lds_atomic_sub_ret_i64_offset:
-; SI: DS_SUB_RTN_U64 {{.*}} 0x20
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i64_offset:
+; SI: ds_sub_rtn_u64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst
@@ -86,22 +84,21 @@ define void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
ret void
}
-; FUNC-LABEL: @lds_atomic_dec_ret_i64:
-; SI: S_MOV_B64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1
-; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
-; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
-; SI: DS_DEC_RTN_U64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}},
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i64:
+; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
+; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
+; SI: ds_dec_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
+; SI: buffer_store_dwordx2 [[RESULT]],
+; SI: s_endpgm
define void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}
-; FUNC-LABEL: @lds_atomic_dec_ret_i64_offset:
-; SI: DS_DEC_RTN_U64 {{.*}} 0x20
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i64_offset:
+; SI: ds_dec_rtn_u64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst
@@ -109,18 +106,18 @@ define void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
ret void
}
-; FUNC-LABEL: @lds_atomic_and_ret_i64:
-; SI: DS_AND_RTN_B64
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_and_ret_i64:
+; SI: ds_and_rtn_b64
+; SI: s_endpgm
define void @lds_atomic_and_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}
-; FUNC-LABEL: @lds_atomic_and_ret_i64_offset:
-; SI: DS_AND_RTN_B64 {{.*}} 0x20
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_and_ret_i64_offset:
+; SI: ds_and_rtn_b64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst
@@ -128,18 +125,18 @@ define void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
ret void
}
-; FUNC-LABEL: @lds_atomic_or_ret_i64:
-; SI: DS_OR_RTN_B64
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_or_ret_i64:
+; SI: ds_or_rtn_b64
+; SI: s_endpgm
define void @lds_atomic_or_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}
-; FUNC-LABEL: @lds_atomic_or_ret_i64_offset:
-; SI: DS_OR_RTN_B64 {{.*}} 0x20
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_or_ret_i64_offset:
+; SI: ds_or_rtn_b64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst
@@ -147,18 +144,18 @@ define void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(
ret void
}
-; FUNC-LABEL: @lds_atomic_xor_ret_i64:
-; SI: DS_XOR_RTN_B64
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i64:
+; SI: ds_xor_rtn_b64
+; SI: s_endpgm
define void @lds_atomic_xor_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}
-; FUNC-LABEL: @lds_atomic_xor_ret_i64_offset:
-; SI: DS_XOR_RTN_B64 {{.*}} 0x20
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i64_offset:
+; SI: ds_xor_rtn_b64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst
@@ -167,25 +164,25 @@ define void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
}
; FIXME: There is no atomic nand instr
-; XFUNC-LABEL: @lds_atomic_nand_ret_i64:uction, so we somehow need to expand this.
+; XFUNC-LABEL: {{^}}lds_atomic_nand_ret_i64:uction, so we somehow need to expand this.
; define void @lds_atomic_nand_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
; %result = atomicrmw nand i64 addrspace(3)* %ptr, i32 4 seq_cst
; store i64 %result, i64 addrspace(1)* %out, align 8
; ret void
; }
-; FUNC-LABEL: @lds_atomic_min_ret_i64:
-; SI: DS_MIN_RTN_I64
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_min_ret_i64:
+; SI: ds_min_rtn_i64
+; SI: s_endpgm
define void @lds_atomic_min_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}
-; FUNC-LABEL: @lds_atomic_min_ret_i64_offset:
-; SI: DS_MIN_RTN_I64 {{.*}} 0x20
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_min_ret_i64_offset:
+; SI: ds_min_rtn_i64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst
@@ -193,18 +190,18 @@ define void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
ret void
}
-; FUNC-LABEL: @lds_atomic_max_ret_i64:
-; SI: DS_MAX_RTN_I64
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_max_ret_i64:
+; SI: ds_max_rtn_i64
+; SI: s_endpgm
define void @lds_atomic_max_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}
-; FUNC-LABEL: @lds_atomic_max_ret_i64_offset:
-; SI: DS_MAX_RTN_I64 {{.*}} 0x20
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_max_ret_i64_offset:
+; SI: ds_max_rtn_i64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst
@@ -212,18 +209,18 @@ define void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
ret void
}
-; FUNC-LABEL: @lds_atomic_umin_ret_i64:
-; SI: DS_MIN_RTN_U64
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i64:
+; SI: ds_min_rtn_u64
+; SI: s_endpgm
define void @lds_atomic_umin_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}
-; FUNC-LABEL: @lds_atomic_umin_ret_i64_offset:
-; SI: DS_MIN_RTN_U64 {{.*}} 0x20
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i64_offset:
+; SI: ds_min_rtn_u64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst
@@ -231,21 +228,240 @@ define void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspac
ret void
}
-; FUNC-LABEL: @lds_atomic_umax_ret_i64:
-; SI: DS_MAX_RTN_U64
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i64:
+; SI: ds_max_rtn_u64
+; SI: s_endpgm
define void @lds_atomic_umax_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}
-; FUNC-LABEL: @lds_atomic_umax_ret_i64_offset:
-; SI: DS_MAX_RTN_U64 {{.*}} 0x20
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i64_offset:
+; SI: ds_max_rtn_u64 {{.*}} offset:32
+; SI: s_endpgm
define void @lds_atomic_umax_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%gep = getelementptr i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}
+
+; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i64:
+; SI: ds_wrxchg_rtn_b64
+; SI: s_endpgm
+define void @lds_atomic_xchg_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+ %result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i64_offset:
+; SI: ds_wrxchg_rtn_b64 {{.*}} offset:32
+; SI: s_endpgm
+define void @lds_atomic_xchg_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+ %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_add_noret_i64:
+; SI: ds_add_u64
+; SI: s_endpgm
+define void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+ %result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_add_noret_i64_offset:
+; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
+; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
+; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
+; SI: ds_add_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0]
+; SI: s_endpgm
+define void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+ %gep = getelementptr i64 addrspace(3)* %ptr, i64 4
+ %result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i64:
+; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
+; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
+; SI: ds_inc_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
+; SI: s_endpgm
+define void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+ %result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i64_offset:
+; SI: ds_inc_u64 {{.*}} offset:32
+; SI: s_endpgm
+define void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+ %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i64:
+; SI: ds_sub_u64
+; SI: s_endpgm
+define void @lds_atomic_sub_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+ %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i64_offset:
+; SI: ds_sub_u64 {{.*}} offset:32
+; SI: s_endpgm
+define void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+ %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64:
+; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
+; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
+; SI: ds_dec_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
+; SI: s_endpgm
+define void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+ %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64_offset:
+; SI: ds_dec_u64 {{.*}} offset:32
+; SI: s_endpgm
+define void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+ %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_and_noret_i64:
+; SI: ds_and_b64
+; SI: s_endpgm
+define void @lds_atomic_and_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+ %result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_and_noret_i64_offset:
+; SI: ds_and_b64 {{.*}} offset:32
+; SI: s_endpgm
+define void @lds_atomic_and_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+ %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_or_noret_i64:
+; SI: ds_or_b64
+; SI: s_endpgm
+define void @lds_atomic_or_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+ %result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_or_noret_i64_offset:
+; SI: ds_or_b64 {{.*}} offset:32
+; SI: s_endpgm
+define void @lds_atomic_or_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+ %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i64:
+; SI: ds_xor_b64
+; SI: s_endpgm
+define void @lds_atomic_xor_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+ %result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i64_offset:
+; SI: ds_xor_b64 {{.*}} offset:32
+; SI: s_endpgm
+define void @lds_atomic_xor_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+ %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst
+ ret void
+}
+
+; FIXME: There is no atomic nand instr
+; XFUNC-LABEL: {{^}}lds_atomic_nand_noret_i64:uction, so we somehow need to expand this.
+; define void @lds_atomic_nand_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+; %result = atomicrmw nand i64 addrspace(3)* %ptr, i32 4 seq_cst
+; ret void
+; }
+
+; FUNC-LABEL: {{^}}lds_atomic_min_noret_i64:
+; SI: ds_min_i64
+; SI: s_endpgm
+define void @lds_atomic_min_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+ %result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_min_noret_i64_offset:
+; SI: ds_min_i64 {{.*}} offset:32
+; SI: s_endpgm
+define void @lds_atomic_min_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+ %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_max_noret_i64:
+; SI: ds_max_i64
+; SI: s_endpgm
+define void @lds_atomic_max_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+ %result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_max_noret_i64_offset:
+; SI: ds_max_i64 {{.*}} offset:32
+; SI: s_endpgm
+define void @lds_atomic_max_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+ %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i64:
+; SI: ds_min_u64
+; SI: s_endpgm
+define void @lds_atomic_umin_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+ %result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i64_offset:
+; SI: ds_min_u64 {{.*}} offset:32
+; SI: s_endpgm
+define void @lds_atomic_umin_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+ %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i64:
+; SI: ds_max_u64
+; SI: s_endpgm
+define void @lds_atomic_umax_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+ %result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i64_offset:
+; SI: ds_max_u64 {{.*}} offset:32
+; SI: s_endpgm
+define void @lds_atomic_umax_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+ %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst
+ ret void
+}
diff --git a/test/CodeGen/R600/local-memory-two-objects.ll b/test/CodeGen/R600/local-memory-two-objects.ll
index e29e4cc88fd9..5c77ad5e6ca1 100644
--- a/test/CodeGen/R600/local-memory-two-objects.ll
+++ b/test/CodeGen/R600/local-memory-two-objects.ll
@@ -1,24 +1,25 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=SI %s
+; RUN: llc < %s -march=amdgcn -mcpu=bonaire -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=CI %s
-@local_memory_two_objects.local_mem0 = internal unnamed_addr addrspace(3) global [4 x i32] zeroinitializer, align 4
-@local_memory_two_objects.local_mem1 = internal unnamed_addr addrspace(3) global [4 x i32] zeroinitializer, align 4
+@local_memory_two_objects.local_mem0 = internal unnamed_addr addrspace(3) global [4 x i32] undef, align 4
+@local_memory_two_objects.local_mem1 = internal unnamed_addr addrspace(3) global [4 x i32] undef, align 4
-; EG-CHECK: @local_memory_two_objects
+; EG-CHECK: {{^}}local_memory_two_objects:
; Check that the LDS size emitted correctly
; EG-CHECK: .long 166120
; EG-CHECK-NEXT: .long 8
; SI-CHECK: .long 47180
-; SI-CHECK-NEXT: .long 32768
+; SI-CHECK-NEXT: .long 38792
; We would like to check the the lds writes are using different
; addresses, but due to variations in the scheduler, we can't do
; this consistently on evergreen GPUs.
; EG-CHECK: LDS_WRITE
; EG-CHECK: LDS_WRITE
-; SI-CHECK: DS_WRITE_B32 {{v[0-9]*}}, v[[ADDRW:[0-9]*]]
-; SI-CHECK-NOT: DS_WRITE_B32 {{v[0-9]*}}, v[[ADDRW]]
+; SI-CHECK: ds_write_b32 {{v[0-9]*}}, v[[ADDRW:[0-9]*]]
+; SI-CHECK-NOT: ds_write_b32 {{v[0-9]*}}, v[[ADDRW]]
; GROUP_BARRIER must be the last instruction in a clause
; EG-CHECK: GROUP_BARRIER
@@ -28,8 +29,10 @@
; constant offsets.
; EG-CHECK: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]]
; EG-CHECK-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]]
-; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]], 0x10
-; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, [[ADDRR]], 0x0,
+; SI: v_add_i32_e32 [[SIPTR:v[0-9]+]], 16, v{{[0-9]+}}
+; SI: ds_read_b32 {{v[0-9]+}}, [[SIPTR]] [M0]
+; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]] offset:16 [M0]
+; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR]] [M0]
define void @local_memory_two_objects(i32 addrspace(1)* %out) {
entry:
diff --git a/test/CodeGen/R600/local-memory.ll b/test/CodeGen/R600/local-memory.ll
index 51af4844cb5a..68e72c556f66 100644
--- a/test/CodeGen/R600/local-memory.ll
+++ b/test/CodeGen/R600/local-memory.ll
@@ -1,32 +1,30 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=bonaire -verify-machineinstrs | FileCheck --check-prefix=CI-CHECK %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
-@local_memory.local_mem = internal unnamed_addr addrspace(3) global [128 x i32] zeroinitializer, align 4
+@local_memory.local_mem = internal unnamed_addr addrspace(3) global [128 x i32] undef, align 4
-; EG-CHECK-LABEL: @local_memory
-; SI-CHECK-LABEL: @local_memory
-; CI-CHECK-LABEL: @local_memory
+; FUNC-LABEL: {{^}}local_memory:
; Check that the LDS size emitted correctly
-; EG-CHECK: .long 166120
-; EG-CHECK-NEXT: .long 128
-; SI-CHECK: .long 47180
-; SI-CHECK-NEXT: .long 65536
-; CI-CHECK: .long 47180
-; CI-CHECK-NEXT: .long 32768
+; EG: .long 166120
+; EG-NEXT: .long 128
+; SI: .long 47180
+; SI-NEXT: .long 71560
+; CI: .long 47180
+; CI-NEXT: .long 38792
-; EG-CHECK: LDS_WRITE
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: DS_WRITE_B32
+; EG: LDS_WRITE
+; SI-NOT: s_wqm_b64
+; SI: ds_write_b32
; GROUP_BARRIER must be the last instruction in a clause
-; EG-CHECK: GROUP_BARRIER
-; EG-CHECK-NEXT: ALU clause
-; SI-CHECK: S_BARRIER
+; EG: GROUP_BARRIER
+; EG-NEXT: ALU clause
+; SI: s_barrier
-; EG-CHECK: LDS_READ_RET
-; SI-CHECK: DS_READ_B32 {{v[0-9]+}},
+; EG: LDS_READ_RET
+; SI: ds_read_b32 {{v[0-9]+}},
define void @local_memory(i32 addrspace(1)* %out) {
entry:
diff --git a/test/CodeGen/R600/loop-address.ll b/test/CodeGen/R600/loop-address.ll
index b46d8e9dfb04..03e0f011fffc 100644
--- a/test/CodeGen/R600/loop-address.ll
+++ b/test/CodeGen/R600/loop-address.ll
@@ -31,7 +31,7 @@ attributes #0 = { nounwind "fp-contract-model"="standard" "relocation-model"="pi
!opencl.kernels = !{!0, !1, !2, !3}
-!0 = metadata !{void (i32 addrspace(1)*, i32)* @loop_ge}
-!1 = metadata !{null}
-!2 = metadata !{null}
-!3 = metadata !{null}
+!0 = !{void (i32 addrspace(1)*, i32)* @loop_ge}
+!1 = !{null}
+!2 = !{null}
+!3 = !{null}
diff --git a/test/CodeGen/R600/loop-idiom.ll b/test/CodeGen/R600/loop-idiom.ll
index 128f661077ea..847a34bc4cda 100644
--- a/test/CodeGen/R600/loop-idiom.ll
+++ b/test/CodeGen/R600/loop-idiom.ll
@@ -1,5 +1,5 @@
; RUN: opt -basicaa -loop-idiom -S < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
-; RUN: opt -basicaa -loop-idiom -S < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
+; RUN: opt -basicaa -loop-idiom -S < %s -march=amdgcn -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "r600--"
@@ -10,8 +10,8 @@ target triple = "r600--"
; implementations of these for R600.
; FUNC: @no_memcpy
-; R600-NOT: @llvm.memcpy
-; SI-NOT: @llvm.memcpy
+; R600-NOT: {{^}}llvm.memcpy
+; SI-NOT: {{^}}llvm.memcpy
define void @no_memcpy(i8 addrspace(3)* %in, i32 %size) {
entry:
%dest = alloca i8, i32 32
@@ -32,10 +32,10 @@ for.end:
}
; FUNC: @no_memset
-; R600-NOT: @llvm.memset
-; R600-NOT: @memset_pattern16
-; SI-NOT: @llvm.memset
-; SI-NOT: @memset_pattern16
+; R600-NOT: {{^}}llvm.memset
+; R600-NOT: {{^}}memset_pattern16:
+; SI-NOT: {{^}}llvm.memset
+; SI-NOT: {{^}}memset_pattern16:
define void @no_memset(i32 %size) {
entry:
%dest = alloca i8, i32 32
diff --git a/test/CodeGen/R600/lshl.ll b/test/CodeGen/R600/lshl.ll
index 216283910009..66f7cbf1d5a6 100644
--- a/test/CodeGen/R600/lshl.ll
+++ b/test/CodeGen/R600/lshl.ll
@@ -1,6 +1,6 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;CHECK: S_LSHL_B32 s{{[0-9]}}, s{{[0-9]}}, 1
+;CHECK: s_lshl_b32 s{{[0-9]}}, s{{[0-9]}}, 1
define void @test(i32 %p) {
%i = mul i32 %p, 2
diff --git a/test/CodeGen/R600/lshr.ll b/test/CodeGen/R600/lshr.ll
index 886d1c4854df..ff6e6fd14890 100644
--- a/test/CodeGen/R600/lshr.ll
+++ b/test/CodeGen/R600/lshr.ll
@@ -1,6 +1,6 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;CHECK: S_LSHR_B32 s{{[0-9]}}, s{{[0-9]}}, 1
+;CHECK: s_lshr_b32 s{{[0-9]}}, s{{[0-9]}}, 1
define void @test(i32 %p) {
%i = udiv i32 %p, 2
diff --git a/test/CodeGen/R600/m0-spill.ll b/test/CodeGen/R600/m0-spill.ll
new file mode 100644
index 000000000000..dc9206ed3f95
--- /dev/null
+++ b/test/CodeGen/R600/m0-spill.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
+
+@lds = external addrspace(3) global [64 x float]
+
+; CHECK-LABEL: {{^}}main:
+; CHECK-NOT: v_readlane_b32 m0
+define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" {
+main_body:
+ %4 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
+ %cmp = fcmp ueq float 0.0, %4
+ br i1 %cmp, label %if, label %else
+
+if:
+ %lds_ptr = getelementptr [64 x float] addrspace(3)* @lds, i32 0, i32 0
+ %lds_data = load float addrspace(3)* %lds_ptr
+ br label %endif
+
+else:
+ %interp = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
+ br label %endif
+
+endif:
+ %export = phi float [%lds_data, %if], [%interp, %else]
+ %5 = call i32 @llvm.SI.packf16(float %export, float %export)
+ %6 = bitcast i32 %5 to float
+ call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %6, float %6, float %6, float %6)
+ ret void
+}
+
+declare float @llvm.SI.fs.constant(i32, i32, i32) readnone
+
+declare i32 @llvm.SI.packf16(float, float) readnone
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/mad-sub.ll b/test/CodeGen/R600/mad-sub.ll
new file mode 100644
index 000000000000..7b4020d2973c
--- /dev/null
+++ b/test/CodeGen/R600/mad-sub.ll
@@ -0,0 +1,215 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.r600.read.tidig.x() #0
+declare float @llvm.fabs.f32(float) #0
+
+; FUNC-LABEL: {{^}}mad_sub_f32:
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -[[REGC]]
+; SI: buffer_store_dword [[RESULT]]
+define void @mad_sub_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %tid.ext = sext i32 %tid to i64
+ %gep0 = getelementptr float addrspace(1)* %ptr, i64 %tid.ext
+ %add1 = add i64 %tid.ext, 1
+ %gep1 = getelementptr float addrspace(1)* %ptr, i64 %add1
+ %add2 = add i64 %tid.ext, 2
+ %gep2 = getelementptr float addrspace(1)* %ptr, i64 %add2
+ %outgep = getelementptr float addrspace(1)* %out, i64 %tid.ext
+ %a = load float addrspace(1)* %gep0, align 4
+ %b = load float addrspace(1)* %gep1, align 4
+ %c = load float addrspace(1)* %gep2, align 4
+ %mul = fmul float %a, %b
+ %sub = fsub float %mul, %c
+ store float %sub, float addrspace(1)* %outgep, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}mad_sub_inv_f32:
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: v_mad_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], [[REGC]]
+; SI: buffer_store_dword [[RESULT]]
+define void @mad_sub_inv_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %tid.ext = sext i32 %tid to i64
+ %gep0 = getelementptr float addrspace(1)* %ptr, i64 %tid.ext
+ %add1 = add i64 %tid.ext, 1
+ %gep1 = getelementptr float addrspace(1)* %ptr, i64 %add1
+ %add2 = add i64 %tid.ext, 2
+ %gep2 = getelementptr float addrspace(1)* %ptr, i64 %add2
+ %outgep = getelementptr float addrspace(1)* %out, i64 %tid.ext
+ %a = load float addrspace(1)* %gep0, align 4
+ %b = load float addrspace(1)* %gep1, align 4
+ %c = load float addrspace(1)* %gep2, align 4
+ %mul = fmul float %a, %b
+ %sub = fsub float %c, %mul
+ store float %sub, float addrspace(1)* %outgep, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}mad_sub_f64:
+; SI: v_mul_f64
+; SI: v_add_f64
+define void @mad_sub_f64(double addrspace(1)* noalias nocapture %out, double addrspace(1)* noalias nocapture readonly %ptr) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %tid.ext = sext i32 %tid to i64
+ %gep0 = getelementptr double addrspace(1)* %ptr, i64 %tid.ext
+ %add1 = add i64 %tid.ext, 1
+ %gep1 = getelementptr double addrspace(1)* %ptr, i64 %add1
+ %add2 = add i64 %tid.ext, 2
+ %gep2 = getelementptr double addrspace(1)* %ptr, i64 %add2
+ %outgep = getelementptr double addrspace(1)* %out, i64 %tid.ext
+ %a = load double addrspace(1)* %gep0, align 8
+ %b = load double addrspace(1)* %gep1, align 8
+ %c = load double addrspace(1)* %gep2, align 8
+ %mul = fmul double %a, %b
+ %sub = fsub double %mul, %c
+ store double %sub, double addrspace(1)* %outgep, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}mad_sub_fabs_f32:
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -|[[REGC]]|
+; SI: buffer_store_dword [[RESULT]]
+define void @mad_sub_fabs_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %tid.ext = sext i32 %tid to i64
+ %gep0 = getelementptr float addrspace(1)* %ptr, i64 %tid.ext
+ %add1 = add i64 %tid.ext, 1
+ %gep1 = getelementptr float addrspace(1)* %ptr, i64 %add1
+ %add2 = add i64 %tid.ext, 2
+ %gep2 = getelementptr float addrspace(1)* %ptr, i64 %add2
+ %outgep = getelementptr float addrspace(1)* %out, i64 %tid.ext
+ %a = load float addrspace(1)* %gep0, align 4
+ %b = load float addrspace(1)* %gep1, align 4
+ %c = load float addrspace(1)* %gep2, align 4
+ %c.abs = call float @llvm.fabs.f32(float %c) #0
+ %mul = fmul float %a, %b
+ %sub = fsub float %mul, %c.abs
+ store float %sub, float addrspace(1)* %outgep, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}mad_sub_fabs_inv_f32:
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: v_mad_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], |[[REGC]]|
+; SI: buffer_store_dword [[RESULT]]
+define void @mad_sub_fabs_inv_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %tid.ext = sext i32 %tid to i64
+ %gep0 = getelementptr float addrspace(1)* %ptr, i64 %tid.ext
+ %add1 = add i64 %tid.ext, 1
+ %gep1 = getelementptr float addrspace(1)* %ptr, i64 %add1
+ %add2 = add i64 %tid.ext, 2
+ %gep2 = getelementptr float addrspace(1)* %ptr, i64 %add2
+ %outgep = getelementptr float addrspace(1)* %out, i64 %tid.ext
+ %a = load float addrspace(1)* %gep0, align 4
+ %b = load float addrspace(1)* %gep1, align 4
+ %c = load float addrspace(1)* %gep2, align 4
+ %c.abs = call float @llvm.fabs.f32(float %c) #0
+ %mul = fmul float %a, %b
+ %sub = fsub float %c.abs, %mul
+ store float %sub, float addrspace(1)* %outgep, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}neg_neg_mad_f32:
+; SI: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+define void @neg_neg_mad_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %tid.ext = sext i32 %tid to i64
+ %gep0 = getelementptr float addrspace(1)* %ptr, i64 %tid.ext
+ %add1 = add i64 %tid.ext, 1
+ %gep1 = getelementptr float addrspace(1)* %ptr, i64 %add1
+ %add2 = add i64 %tid.ext, 2
+ %gep2 = getelementptr float addrspace(1)* %ptr, i64 %add2
+ %outgep = getelementptr float addrspace(1)* %out, i64 %tid.ext
+ %a = load float addrspace(1)* %gep0, align 4
+ %b = load float addrspace(1)* %gep1, align 4
+ %c = load float addrspace(1)* %gep2, align 4
+ %nega = fsub float -0.000000e+00, %a
+ %negb = fsub float -0.000000e+00, %b
+ %mul = fmul float %nega, %negb
+ %sub = fadd float %mul, %c
+ store float %sub, float addrspace(1)* %outgep, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}mad_fabs_sub_f32:
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], |[[REGB]]|, -[[REGC]]
+; SI: buffer_store_dword [[RESULT]]
+define void @mad_fabs_sub_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %tid.ext = sext i32 %tid to i64
+ %gep0 = getelementptr float addrspace(1)* %ptr, i64 %tid.ext
+ %add1 = add i64 %tid.ext, 1
+ %gep1 = getelementptr float addrspace(1)* %ptr, i64 %add1
+ %add2 = add i64 %tid.ext, 2
+ %gep2 = getelementptr float addrspace(1)* %ptr, i64 %add2
+ %outgep = getelementptr float addrspace(1)* %out, i64 %tid.ext
+ %a = load float addrspace(1)* %gep0, align 4
+ %b = load float addrspace(1)* %gep1, align 4
+ %c = load float addrspace(1)* %gep2, align 4
+ %b.abs = call float @llvm.fabs.f32(float %b) #0
+ %mul = fmul float %a, %b.abs
+ %sub = fsub float %mul, %c
+ store float %sub, float addrspace(1)* %outgep, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fsub_c_fadd_a_a:
+; SI-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI: v_mad_f32 [[RESULT:v[0-9]+]], -2.0, [[R1]], [[R2]]
+; SI: buffer_store_dword [[RESULT]]
+define void @fsub_c_fadd_a_a(float addrspace(1)* %out, float addrspace(1)* %in) {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+
+ %r1 = load float addrspace(1)* %gep.0
+ %r2 = load float addrspace(1)* %gep.1
+
+ %add = fadd float %r1, %r1
+ %r3 = fsub float %r2, %add
+
+ store float %r3, float addrspace(1)* %gep.out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fsub_fadd_a_a_c:
+; SI-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], -[[R2]]
+; SI: buffer_store_dword [[RESULT]]
+define void @fsub_fadd_a_a_c(float addrspace(1)* %out, float addrspace(1)* %in) {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+
+ %r1 = load float addrspace(1)* %gep.0
+ %r2 = load float addrspace(1)* %gep.1
+
+ %add = fadd float %r1, %r1
+ %r3 = fsub float %add, %r2
+
+ store float %r3, float addrspace(1)* %gep.out
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/R600/mad_int24.ll b/test/CodeGen/R600/mad_int24.ll
index abb52907b9b8..60f6e15f2f57 100644
--- a/test/CodeGen/R600/mad_int24.ll
+++ b/test/CodeGen/R600/mad_int24.ll
@@ -1,15 +1,17 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM --check-prefix=FUNC
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
-; FUNC-LABEL: @i32_mad24
+declare i32 @llvm.AMDGPU.imul24(i32, i32) nounwind readnone
+
+; FUNC-LABEL: {{^}}i32_mad24:
; Signed 24-bit multiply is not supported on pre-Cayman GPUs.
; EG: MULLO_INT
; Make sure we aren't masking the inputs.
; CM-NOT: AND
; CM: MULADD_INT24
-; SI-NOT: AND
-; SI: V_MAD_I32_I24
+; SI-NOT: and
+; SI: v_mad_i32_i24
define void @i32_mad24(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
entry:
%0 = shl i32 %a, 8
@@ -21,3 +23,12 @@ entry:
store i32 %3, i32 addrspace(1)* %out
ret void
}
+
+; FUNC-LABEL: @test_imul24
+; SI: v_mad_i32_i24
+define void @test_imul24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
+ %mul = call i32 @llvm.AMDGPU.imul24(i32 %src0, i32 %src1) nounwind readnone
+ %add = add i32 %mul, %src2
+ store i32 %add, i32 addrspace(1)* %out, align 4
+ ret void
+}
diff --git a/test/CodeGen/R600/mad_uint24.ll b/test/CodeGen/R600/mad_uint24.ll
index 0f0893bd53c4..db776521151a 100644
--- a/test/CodeGen/R600/mad_uint24.ll
+++ b/test/CodeGen/R600/mad_uint24.ll
@@ -1,10 +1,10 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
-; FUNC-LABEL: @u32_mad24
+; FUNC-LABEL: {{^}}u32_mad24:
; EG: MULADD_UINT24
-; SI: V_MAD_U32_U24
+; SI: v_mad_u32_u24
define void @u32_mad24(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
entry:
@@ -18,14 +18,14 @@ entry:
ret void
}
-; FUNC-LABEL: @i16_mad24
+; FUNC-LABEL: {{^}}i16_mad24:
; The order of A and B does not matter.
; EG: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
; The result must be sign-extended
; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
; EG: 16
-; SI: V_MAD_U32_U24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; SI: V_BFE_I32 v{{[0-9]}}, [[MAD]], 0, 16
+; SI: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; SI: v_bfe_i32 v{{[0-9]}}, [[MAD]], 0, 16
define void @i16_mad24(i32 addrspace(1)* %out, i16 %a, i16 %b, i16 %c) {
entry:
@@ -36,13 +36,13 @@ entry:
ret void
}
-; FUNC-LABEL: @i8_mad24
+; FUNC-LABEL: {{^}}i8_mad24:
; EG: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
; The result must be sign-extended
; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
; EG: 8
-; SI: V_MAD_U32_U24 [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; SI: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 8
+; SI: v_mad_u32_u24 [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; SI: v_bfe_i32 v{{[0-9]}}, [[MUL]], 0, 8
define void @i8_mad24(i32 addrspace(1)* %out, i8 %a, i8 %b, i8 %c) {
entry:
@@ -60,9 +60,9 @@ entry:
; 24-bit mad pattern wasn't being matched.
; Check that the select instruction is not deleted.
-; FUNC-LABEL: @i24_i32_i32_mad
+; FUNC-LABEL: {{^}}i24_i32_i32_mad:
; EG: CNDE_INT
-; SI: V_CNDMASK
+; SI: v_cndmask
define void @i24_i32_i32_mad(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {
entry:
%0 = ashr i32 %a, 8
diff --git a/test/CodeGen/R600/max-literals.ll b/test/CodeGen/R600/max-literals.ll
index 65a6d2b5fc95..c357524b140f 100644
--- a/test/CodeGen/R600/max-literals.ll
+++ b/test/CodeGen/R600/max-literals.ll
@@ -1,6 +1,6 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-; CHECK: @main
+; CHECK-LABEL: {{^}}main:
; CHECK: ADD *
define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 {
@@ -29,7 +29,7 @@ main_body:
ret void
}
-; CHECK: @main
+; CHECK-LABEL: {{^}}main2:
; CHECK-NOT: ADD *
define void @main2(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 {
diff --git a/test/CodeGen/R600/max.ll b/test/CodeGen/R600/max.ll
new file mode 100644
index 000000000000..20af99332453
--- /dev/null
+++ b/test/CodeGen/R600/max.ll
@@ -0,0 +1,99 @@
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+; FUNC-LABEL: @v_test_imax_sge_i32
+; SI: v_max_i32_e32
+define void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+ %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %a = load i32 addrspace(1)* %gep0, align 4
+ %b = load i32 addrspace(1)* %gep1, align 4
+ %cmp = icmp sge i32 %a, %b
+ %val = select i1 %cmp, i32 %a, i32 %b
+ store i32 %val, i32 addrspace(1)* %outgep, align 4
+ ret void
+}
+
+; FUNC-LABEL: @s_test_imax_sge_i32
+; SI: s_max_i32
+define void @s_test_imax_sge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %cmp = icmp sge i32 %a, %b
+ %val = select i1 %cmp, i32 %a, i32 %b
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @v_test_imax_sgt_i32
+; SI: v_max_i32_e32
+define void @v_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+ %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %a = load i32 addrspace(1)* %gep0, align 4
+ %b = load i32 addrspace(1)* %gep1, align 4
+ %cmp = icmp sgt i32 %a, %b
+ %val = select i1 %cmp, i32 %a, i32 %b
+ store i32 %val, i32 addrspace(1)* %outgep, align 4
+ ret void
+}
+
+; FUNC-LABEL: @s_test_imax_sgt_i32
+; SI: s_max_i32
+define void @s_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %cmp = icmp sgt i32 %a, %b
+ %val = select i1 %cmp, i32 %a, i32 %b
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @v_test_umax_uge_i32
+; SI: v_max_u32_e32
+define void @v_test_umax_uge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+ %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %a = load i32 addrspace(1)* %gep0, align 4
+ %b = load i32 addrspace(1)* %gep1, align 4
+ %cmp = icmp uge i32 %a, %b
+ %val = select i1 %cmp, i32 %a, i32 %b
+ store i32 %val, i32 addrspace(1)* %outgep, align 4
+ ret void
+}
+
+; FUNC-LABEL: @s_test_umax_uge_i32
+; SI: s_max_u32
+define void @s_test_umax_uge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %cmp = icmp uge i32 %a, %b
+ %val = select i1 %cmp, i32 %a, i32 %b
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @v_test_umax_ugt_i32
+; SI: v_max_u32_e32
+define void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+ %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %a = load i32 addrspace(1)* %gep0, align 4
+ %b = load i32 addrspace(1)* %gep1, align 4
+ %cmp = icmp ugt i32 %a, %b
+ %val = select i1 %cmp, i32 %a, i32 %b
+ store i32 %val, i32 addrspace(1)* %outgep, align 4
+ ret void
+}
+
+; FUNC-LABEL: @s_test_umax_ugt_i32
+; SI: s_max_u32
+define void @s_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %cmp = icmp ugt i32 %a, %b
+ %val = select i1 %cmp, i32 %a, i32 %b
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ ret void
+}
diff --git a/test/CodeGen/R600/max3.ll b/test/CodeGen/R600/max3.ll
new file mode 100644
index 000000000000..f905e171b334
--- /dev/null
+++ b/test/CodeGen/R600/max3.ll
@@ -0,0 +1,41 @@
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+; FUNC-LABEL: @v_test_imax3_sgt_i32
+; SI: v_max3_i32
+define void @v_test_imax3_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %a = load i32 addrspace(1)* %gep0, align 4
+ %b = load i32 addrspace(1)* %gep1, align 4
+ %c = load i32 addrspace(1)* %gep2, align 4
+ %icmp0 = icmp sgt i32 %a, %b
+ %i0 = select i1 %icmp0, i32 %a, i32 %b
+ %icmp1 = icmp sgt i32 %i0, %c
+ %i1 = select i1 %icmp1, i32 %i0, i32 %c
+ store i32 %i1, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @v_test_umax3_ugt_i32
+; SI: v_max3_u32
+define void @v_test_umax3_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %a = load i32 addrspace(1)* %gep0, align 4
+ %b = load i32 addrspace(1)* %gep1, align 4
+ %c = load i32 addrspace(1)* %gep2, align 4
+ %icmp0 = icmp ugt i32 %a, %b
+ %i0 = select i1 %icmp0, i32 %a, i32 %b
+ %icmp1 = icmp ugt i32 %i0, %c
+ %i1 = select i1 %icmp1, i32 %i0, i32 %c
+ store i32 %i1, i32 addrspace(1)* %out, align 4
+ ret void
+}
diff --git a/test/CodeGen/R600/min.ll b/test/CodeGen/R600/min.ll
new file mode 100644
index 000000000000..00ba5c6cddb4
--- /dev/null
+++ b/test/CodeGen/R600/min.ll
@@ -0,0 +1,120 @@
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+; FUNC-LABEL: @v_test_imin_sle_i32
+; SI: v_min_i32_e32
+define void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+ %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %a = load i32 addrspace(1)* %gep0, align 4
+ %b = load i32 addrspace(1)* %gep1, align 4
+ %cmp = icmp sle i32 %a, %b
+ %val = select i1 %cmp, i32 %a, i32 %b
+ store i32 %val, i32 addrspace(1)* %outgep, align 4
+ ret void
+}
+
+; FUNC-LABEL: @s_test_imin_sle_i32
+; SI: s_min_i32
+define void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %cmp = icmp sle i32 %a, %b
+ %val = select i1 %cmp, i32 %a, i32 %b
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @v_test_imin_slt_i32
+; SI: v_min_i32_e32
+define void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+ %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %a = load i32 addrspace(1)* %gep0, align 4
+ %b = load i32 addrspace(1)* %gep1, align 4
+ %cmp = icmp slt i32 %a, %b
+ %val = select i1 %cmp, i32 %a, i32 %b
+ store i32 %val, i32 addrspace(1)* %outgep, align 4
+ ret void
+}
+
+; FUNC-LABEL: @s_test_imin_slt_i32
+; SI: s_min_i32
+define void @s_test_imin_slt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %cmp = icmp slt i32 %a, %b
+ %val = select i1 %cmp, i32 %a, i32 %b
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @v_test_umin_ule_i32
+; SI: v_min_u32_e32
+define void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+ %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %a = load i32 addrspace(1)* %gep0, align 4
+ %b = load i32 addrspace(1)* %gep1, align 4
+ %cmp = icmp ule i32 %a, %b
+ %val = select i1 %cmp, i32 %a, i32 %b
+ store i32 %val, i32 addrspace(1)* %outgep, align 4
+ ret void
+}
+
+; FUNC-LABEL: @s_test_umin_ule_i32
+; SI: s_min_u32
+define void @s_test_umin_ule_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %cmp = icmp ule i32 %a, %b
+ %val = select i1 %cmp, i32 %a, i32 %b
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @v_test_umin_ult_i32
+; SI: v_min_u32_e32
+define void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+ %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %a = load i32 addrspace(1)* %gep0, align 4
+ %b = load i32 addrspace(1)* %gep1, align 4
+ %cmp = icmp ult i32 %a, %b
+ %val = select i1 %cmp, i32 %a, i32 %b
+ store i32 %val, i32 addrspace(1)* %outgep, align 4
+ ret void
+}
+
+; FUNC-LABEL: @s_test_umin_ult_i32
+; SI: s_min_u32
+define void @s_test_umin_ult_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %cmp = icmp ult i32 %a, %b
+ %val = select i1 %cmp, i32 %a, i32 %b
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @v_test_umin_ult_i32_multi_use
+; SI-NOT: v_min
+; SI: v_cmp_lt_u32
+; SI-NEXT: v_cndmask_b32
+; SI-NOT: v_min
+; SI: s_endpgm
+define void @v_test_umin_ult_i32_multi_use(i32 addrspace(1)* %out0, i1 addrspace(1)* %out1, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+ %outgep0 = getelementptr i32 addrspace(1)* %out0, i32 %tid
+ %outgep1 = getelementptr i1 addrspace(1)* %out1, i32 %tid
+ %a = load i32 addrspace(1)* %gep0, align 4
+ %b = load i32 addrspace(1)* %gep1, align 4
+ %cmp = icmp ult i32 %a, %b
+ %val = select i1 %cmp, i32 %a, i32 %b
+ store i32 %val, i32 addrspace(1)* %outgep0, align 4
+ store i1 %cmp, i1 addrspace(1)* %outgep1
+ ret void
+}
diff --git a/test/CodeGen/R600/min3.ll b/test/CodeGen/R600/min3.ll
new file mode 100644
index 000000000000..6c11a650fcbb
--- /dev/null
+++ b/test/CodeGen/R600/min3.ll
@@ -0,0 +1,111 @@
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+; FUNC-LABEL: @v_test_imin3_slt_i32
+; SI: v_min3_i32
+define void @v_test_imin3_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %a = load i32 addrspace(1)* %gep0, align 4
+ %b = load i32 addrspace(1)* %gep1, align 4
+ %c = load i32 addrspace(1)* %gep2, align 4
+ %icmp0 = icmp slt i32 %a, %b
+ %i0 = select i1 %icmp0, i32 %a, i32 %b
+ %icmp1 = icmp slt i32 %i0, %c
+ %i1 = select i1 %icmp1, i32 %i0, i32 %c
+ store i32 %i1, i32 addrspace(1)* %outgep, align 4
+ ret void
+}
+
+; FUNC-LABEL: @v_test_umin3_ult_i32
+; SI: v_min3_u32
+define void @v_test_umin3_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %a = load i32 addrspace(1)* %gep0, align 4
+ %b = load i32 addrspace(1)* %gep1, align 4
+ %c = load i32 addrspace(1)* %gep2, align 4
+ %icmp0 = icmp ult i32 %a, %b
+ %i0 = select i1 %icmp0, i32 %a, i32 %b
+ %icmp1 = icmp ult i32 %i0, %c
+ %i1 = select i1 %icmp1, i32 %i0, i32 %c
+ store i32 %i1, i32 addrspace(1)* %outgep, align 4
+ ret void
+}
+
+; FUNC-LABEL: @v_test_umin_umin_umin
+; SI: v_min_i32
+; SI: v_min3_i32
+define void @v_test_umin_umin_umin(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %tid2 = mul i32 %tid, 2
+ %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
+
+ %gep3 = getelementptr i32 addrspace(1)* %aptr, i32 %tid2
+ %gep4 = getelementptr i32 addrspace(1)* %bptr, i32 %tid2
+ %gep5 = getelementptr i32 addrspace(1)* %cptr, i32 %tid2
+
+ %outgep0 = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %outgep1 = getelementptr i32 addrspace(1)* %out, i32 %tid2
+
+ %a = load i32 addrspace(1)* %gep0, align 4
+ %b = load i32 addrspace(1)* %gep1, align 4
+ %c = load i32 addrspace(1)* %gep2, align 4
+ %d = load i32 addrspace(1)* %gep3, align 4
+
+ %icmp0 = icmp slt i32 %a, %b
+ %i0 = select i1 %icmp0, i32 %a, i32 %b
+
+ %icmp1 = icmp slt i32 %c, %d
+ %i1 = select i1 %icmp1, i32 %c, i32 %d
+
+ %icmp2 = icmp slt i32 %i0, %i1
+ %i2 = select i1 %icmp2, i32 %i0, i32 %i1
+
+ store i32 %i2, i32 addrspace(1)* %outgep1, align 4
+ ret void
+}
+
+; FUNC-LABEL: @v_test_umin3_2_uses
+; SI-NOT: v_min3
+define void @v_test_umin3_2_uses(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %tid2 = mul i32 %tid, 2
+ %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
+
+ %gep3 = getelementptr i32 addrspace(1)* %aptr, i32 %tid2
+ %gep4 = getelementptr i32 addrspace(1)* %bptr, i32 %tid2
+ %gep5 = getelementptr i32 addrspace(1)* %cptr, i32 %tid2
+
+ %outgep0 = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %outgep1 = getelementptr i32 addrspace(1)* %out, i32 %tid2
+
+ %a = load i32 addrspace(1)* %gep0, align 4
+ %b = load i32 addrspace(1)* %gep1, align 4
+ %c = load i32 addrspace(1)* %gep2, align 4
+ %d = load i32 addrspace(1)* %gep3, align 4
+
+ %icmp0 = icmp slt i32 %a, %b
+ %i0 = select i1 %icmp0, i32 %a, i32 %b
+
+ %icmp1 = icmp slt i32 %c, %d
+ %i1 = select i1 %icmp1, i32 %c, i32 %d
+
+ %icmp2 = icmp slt i32 %i0, %c
+ %i2 = select i1 %icmp2, i32 %i0, i32 %c
+
+ store i32 %i2, i32 addrspace(1)* %outgep0, align 4
+ store i32 %i0, i32 addrspace(1)* %outgep1, align 4
+ ret void
+}
diff --git a/test/CodeGen/R600/missing-store.ll b/test/CodeGen/R600/missing-store.ll
new file mode 100644
index 000000000000..8ddef35a694a
--- /dev/null
+++ b/test/CodeGen/R600/missing-store.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
+
+@ptr_load = addrspace(3) global i32 addrspace(2)* undef, align 8
+
+; Make sure when the load from %ptr2 is folded the chain isn't lost,
+; resulting in losing the store to gptr
+
+; FUNC-LABEL: {{^}}missing_store_reduced:
+; SI: ds_read_b64
+; SI: buffer_store_dword
+; SI: buffer_load_dword
+; SI: buffer_store_dword
+; SI: s_endpgm
+define void @missing_store_reduced(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
+ %ptr0 = load i32 addrspace(2)* addrspace(3)* @ptr_load, align 8
+ %ptr2 = getelementptr inbounds i32 addrspace(2)* %ptr0, i64 2
+
+ store i32 99, i32 addrspace(1)* %gptr, align 4
+ %tmp2 = load i32 addrspace(2)* %ptr2, align 4
+
+ store i32 %tmp2, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/R600/mubuf.ll b/test/CodeGen/R600/mubuf.ll
index f465d3dad8f0..9c2a17ce04f4 100644
--- a/test/CodeGen/R600/mubuf.ll
+++ b/test/CodeGen/R600/mubuf.ll
@@ -1,12 +1,14 @@
-; RUN: llc < %s -march=r600 -mcpu=SI -show-mc-encoding -verify-machineinstrs | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s
+
+declare i32 @llvm.r600.read.tidig.x() readnone
;;;==========================================================================;;;
;;; MUBUF LOAD TESTS
;;;==========================================================================;;;
; MUBUF load with an immediate byte offset that fits into 12-bits
-; CHECK-LABEL: @mubuf_load0
-; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x4 ; encoding: [0x04,0x80
+; CHECK-LABEL: {{^}}mubuf_load0:
+; CHECK: buffer_load_dword v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4 ; encoding: [0x04,0x00,0x30,0xe0
define void @mubuf_load0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
%0 = getelementptr i32 addrspace(1)* %in, i64 1
@@ -16,8 +18,8 @@ entry:
}
; MUBUF load with the largest possible immediate offset
-; CHECK-LABEL: @mubuf_load1
-; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0xfff ; encoding: [0xff,0x8f
+; CHECK-LABEL: {{^}}mubuf_load1:
+; CHECK: buffer_load_ubyte v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0
define void @mubuf_load1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
entry:
%0 = getelementptr i8 addrspace(1)* %in, i64 4095
@@ -27,8 +29,8 @@ entry:
}
; MUBUF load with an immediate byte offset that doesn't fit into 12-bits
-; CHECK-LABEL: @mubuf_load2
-; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x0 ; encoding: [0x00,0x80
+; CHECK-LABEL: {{^}}mubuf_load2:
+; CHECK: buffer_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 ; encoding: [0x00,0x80,0x30,0xe0
define void @mubuf_load2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
%0 = getelementptr i32 addrspace(1)* %in, i64 1024
@@ -38,9 +40,9 @@ entry:
}
; MUBUF load with a 12-bit immediate offset and a register offset
-; CHECK-LABEL: @mubuf_load3
+; CHECK-LABEL: {{^}}mubuf_load3:
; CHECK-NOT: ADD
-; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x4 ; encoding: [0x04,0x80
+; CHECK: buffer_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:4 ; encoding: [0x04,0x80,0x30,0xe0
define void @mubuf_load3(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i64 %offset) {
entry:
%0 = getelementptr i32 addrspace(1)* %in, i64 %offset
@@ -50,13 +52,52 @@ entry:
ret void
}
+; CHECK-LABEL: {{^}}soffset_max_imm:
+; CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 64 offen glc
+define void @soffset_max_imm([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #1 {
+main_body:
+ %tmp0 = getelementptr [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0
+ %tmp1 = load <16 x i8> addrspace(2)* %tmp0
+ %tmp2 = shl i32 %6, 2
+ %tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 64, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
+ %tmp4 = add i32 %6, 16
+ %tmp5 = bitcast float 0.0 to i32
+ call void @llvm.SI.tbuffer.store.i32(<16 x i8> %tmp1, i32 %tmp5, i32 1, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
+ ret void
+}
+
+; Make sure immediates that aren't inline constants don't get folded into
+; the soffset operand.
+; FIXME: for this test we should be smart enough to shift the immediate into
+; the offset field.
+; CHECK-LABEL: {{^}}soffset_no_fold:
+; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x41
+; CHECK: buffer_load_dword v{{[0-9+]}}, v{{[0-9+]}}, s[{{[0-9]+}}:{{[0-9]+}}], [[SOFFSET]] offen glc
+define void @soffset_no_fold([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #1 {
+main_body:
+ %tmp0 = getelementptr [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0
+ %tmp1 = load <16 x i8> addrspace(2)* %tmp0
+ %tmp2 = shl i32 %6, 2
+ %tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 65, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
+ %tmp4 = add i32 %6, 16
+ %tmp5 = bitcast float 0.0 to i32
+ call void @llvm.SI.tbuffer.store.i32(<16 x i8> %tmp1, i32 %tmp5, i32 1, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
+ ret void
+}
+
+declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #3
+declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
+
+attributes #1 = { "ShaderType"="2" "unsafe-fp-math"="true" }
+attributes #3 = { nounwind readonly }
+
;;;==========================================================================;;;
;;; MUBUF STORE TESTS
;;;==========================================================================;;;
; MUBUF store with an immediate byte offset that fits into 12-bits
-; CHECK-LABEL: @mubuf_store0
-; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x4 ; encoding: [0x04,0x80
+; CHECK-LABEL: {{^}}mubuf_store0:
+; CHECK: buffer_store_dword v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4 ; encoding: [0x04,0x00,0x70,0xe0
define void @mubuf_store0(i32 addrspace(1)* %out) {
entry:
%0 = getelementptr i32 addrspace(1)* %out, i64 1
@@ -65,8 +106,8 @@ entry:
}
; MUBUF store with the largest possible immediate offset
-; CHECK-LABEL: @mubuf_store1
-; CHECK: BUFFER_STORE_BYTE v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0xfff ; encoding: [0xff,0x8f
+; CHECK-LABEL: {{^}}mubuf_store1:
+; CHECK: buffer_store_byte v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0
define void @mubuf_store1(i8 addrspace(1)* %out) {
entry:
@@ -76,8 +117,8 @@ entry:
}
; MUBUF store with an immediate byte offset that doesn't fit into 12-bits
-; CHECK-LABEL: @mubuf_store2
-; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x0 ; encoding: [0x00,0x80
+; CHECK-LABEL: {{^}}mubuf_store2:
+; CHECK: buffer_store_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]:[0-9]}}], 0 addr64 ; encoding: [0x00,0x80,0x70,0xe0
define void @mubuf_store2(i32 addrspace(1)* %out) {
entry:
%0 = getelementptr i32 addrspace(1)* %out, i64 1024
@@ -86,9 +127,9 @@ entry:
}
; MUBUF store with a 12-bit immediate offset and a register offset
-; CHECK-LABEL: @mubuf_store3
+; CHECK-LABEL: {{^}}mubuf_store3:
; CHECK-NOT: ADD
-; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x4 ; encoding: [0x04,0x80
+; CHECK: buffer_store_dword v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:4 ; encoding: [0x04,0x80,0x70,0xe0
define void @mubuf_store3(i32 addrspace(1)* %out, i64 %offset) {
entry:
%0 = getelementptr i32 addrspace(1)* %out, i64 %offset
@@ -96,3 +137,35 @@ entry:
store i32 0, i32 addrspace(1)* %1
ret void
}
+
+; CHECK-LABEL: {{^}}store_sgpr_ptr:
+; CHECK: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0
+define void @store_sgpr_ptr(i32 addrspace(1)* %out) #0 {
+ store i32 99, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; CHECK-LABEL: {{^}}store_sgpr_ptr_offset:
+; CHECK: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:40
+define void @store_sgpr_ptr_offset(i32 addrspace(1)* %out) #0 {
+ %out.gep = getelementptr i32 addrspace(1)* %out, i32 10
+ store i32 99, i32 addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; CHECK-LABEL: {{^}}store_sgpr_ptr_large_offset:
+; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
+define void @store_sgpr_ptr_large_offset(i32 addrspace(1)* %out) #0 {
+ %out.gep = getelementptr i32 addrspace(1)* %out, i32 32768
+ store i32 99, i32 addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; CHECK-LABEL: {{^}}store_vgpr_ptr:
+; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
+define void @store_vgpr_ptr(i32 addrspace(1)* %out) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() readnone
+ %out.gep = getelementptr i32 addrspace(1)* %out, i32 %tid
+ store i32 99, i32 addrspace(1)* %out.gep, align 4
+ ret void
+}
diff --git a/test/CodeGen/R600/mul.ll b/test/CodeGen/R600/mul.ll
index d231e92e27fa..bdf18e3ff9ef 100644
--- a/test/CodeGen/R600/mul.ll
+++ b/test/CodeGen/R600/mul.ll
@@ -1,16 +1,16 @@
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG %s -check-prefix=FUNC
-; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; mul24 and mad24 are affected
-; FUNC-LABEL: @test2
+; FUNC-LABEL: {{^}}test_mul_v2i32:
; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+define void @test_mul_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
%a = load <2 x i32> addrspace(1) * %in
%b = load <2 x i32> addrspace(1) * %b_ptr
@@ -19,18 +19,18 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
ret void
}
-; FUNC-LABEL: @test4
+; FUNC-LABEL: {{^}}v_mul_v4i32:
; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+define void @v_mul_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
%a = load <4 x i32> addrspace(1) * %in
%b = load <4 x i32> addrspace(1) * %b_ptr
@@ -39,12 +39,26 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
ret void
}
-; FUNC-LABEL: @trunc_i64_mul_to_i32
-; SI: S_LOAD_DWORD
-; SI: S_LOAD_DWORD
-; SI: V_MUL_LO_I32
-; SI: BUFFER_STORE_DWORD
-define void @trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+; FUNC-LABEL: {{^}}s_trunc_i64_mul_to_i32:
+; SI: s_load_dword
+; SI: s_load_dword
+; SI: s_mul_i32
+; SI: buffer_store_dword
+define void @s_trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+ %mul = mul i64 %b, %a
+ %trunc = trunc i64 %mul to i32
+ store i32 %trunc, i32 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_trunc_i64_mul_to_i32:
+; SI: s_load_dword
+; SI: s_load_dword
+; SI: v_mul_lo_i32
+; SI: buffer_store_dword
+define void @v_trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
+ %a = load i64 addrspace(1)* %aptr, align 8
+ %b = load i64 addrspace(1)* %bptr, align 8
%mul = mul i64 %b, %a
%trunc = trunc i64 %mul to i32
store i32 %trunc, i32 addrspace(1)* %out, align 8
@@ -53,11 +67,11 @@ define void @trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
; This 64-bit multiply should just use MUL_HI and MUL_LO, since the top
; 32-bits of both arguments are sign bits.
-; FUNC-LABEL: @mul64_sext_c
+; FUNC-LABEL: {{^}}mul64_sext_c:
; EG-DAG: MULLO_INT
; EG-DAG: MULHI_INT
-; SI-DAG: V_MUL_LO_I32
-; SI-DAG: V_MUL_HI_I32
+; SI-DAG: s_mul_i32
+; SI-DAG: v_mul_hi_i32
define void @mul64_sext_c(i64 addrspace(1)* %out, i32 %in) {
entry:
%0 = sext i32 %in to i64
@@ -66,16 +80,120 @@ entry:
ret void
}
+; FUNC-LABEL: {{^}}v_mul64_sext_c:
+; EG-DAG: MULLO_INT
+; EG-DAG: MULHI_INT
+; SI-DAG: v_mul_lo_i32
+; SI-DAG: v_mul_hi_i32
+; SI: s_endpgm
+define void @v_mul64_sext_c(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %val = load i32 addrspace(1)* %in, align 4
+ %ext = sext i32 %val to i64
+ %mul = mul i64 %ext, 80
+ store i64 %mul, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_mul64_sext_inline_imm:
+; SI-DAG: v_mul_lo_i32 v{{[0-9]+}}, 9, v{{[0-9]+}}
+; SI-DAG: v_mul_hi_i32 v{{[0-9]+}}, 9, v{{[0-9]+}}
+; SI: s_endpgm
+define void @v_mul64_sext_inline_imm(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %val = load i32 addrspace(1)* %in, align 4
+ %ext = sext i32 %val to i64
+ %mul = mul i64 %ext, 9
+ store i64 %mul, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_mul_i32:
+; SI: s_load_dword [[SRC0:s[0-9]+]],
+; SI: s_load_dword [[SRC1:s[0-9]+]],
+; SI: s_mul_i32 [[SRESULT:s[0-9]+]], [[SRC0]], [[SRC1]]
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: buffer_store_dword [[VRESULT]],
+; SI: s_endpgm
+define void @s_mul_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %mul = mul i32 %a, %b
+ store i32 %mul, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_mul_i32:
+; SI: v_mul_lo_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+define void @v_mul_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+ %a = load i32 addrspace(1)* %in
+ %b = load i32 addrspace(1)* %b_ptr
+ %result = mul i32 %a, %b
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
; A standard 64-bit multiply. The expansion should be around 6 instructions.
; It would be difficult to match the expansion correctly without writing
; a really complicated list of FileCheck expressions. I don't want
; to confuse people who may 'break' this test with a correct optimization,
; so this test just uses FUNC-LABEL to make sure the compiler does not
; crash with a 'failed to select' error.
-; FUNC-LABEL: @mul64
-define void @mul64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+
+; FUNC-LABEL: {{^}}s_mul_i64:
+define void @s_mul_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
+ %mul = mul i64 %a, %b
+ store i64 %mul, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_mul_i64:
+; SI: v_mul_lo_i32
+define void @v_mul_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
+ %a = load i64 addrspace(1)* %aptr, align 8
+ %b = load i64 addrspace(1)* %bptr, align 8
+ %mul = mul i64 %a, %b
+ store i64 %mul, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}mul32_in_branch:
+; SI: s_mul_i32
+define void @mul32_in_branch(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %a, i32 %b, i32 %c) {
+entry:
+ %0 = icmp eq i32 %a, 0
+ br i1 %0, label %if, label %else
+
+if:
+ %1 = load i32 addrspace(1)* %in
+ br label %endif
+
+else:
+ %2 = mul i32 %a, %b
+ br label %endif
+
+endif:
+ %3 = phi i32 [%1, %if], [%2, %else]
+ store i32 %3, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}mul64_in_branch:
+; SI-DAG: s_mul_i32
+; SI-DAG: v_mul_hi_u32
+; SI: s_endpgm
+define void @mul64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) {
entry:
- %0 = mul i64 %a, %b
- store i64 %0, i64 addrspace(1)* %out
+ %0 = icmp eq i64 %a, 0
+ br i1 %0, label %if, label %else
+
+if:
+ %1 = load i64 addrspace(1)* %in
+ br label %endif
+
+else:
+ %2 = mul i64 %a, %b
+ br label %endif
+
+endif:
+ %3 = phi i64 [%1, %if], [%2, %else]
+ store i64 %3, i64 addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/mul_int24.ll b/test/CodeGen/R600/mul_int24.ll
index 046911ba147d..eecde0d73af4 100644
--- a/test/CodeGen/R600/mul_int24.ll
+++ b/test/CodeGen/R600/mul_int24.ll
@@ -1,15 +1,15 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM --check-prefix=FUNC
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
-; FUNC-LABEL: @i32_mul24
+; FUNC-LABEL: {{^}}i32_mul24:
; Signed 24-bit multiply is not supported on pre-Cayman GPUs.
; EG: MULLO_INT
; Make sure we are not masking the inputs
; CM-NOT: AND
; CM: MUL_INT24
-; SI-NOT: AND
-; SI: V_MUL_I32_I24
+; SI-NOT: and
+; SI: v_mul_i32_i24
define void @i32_mul24(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%0 = shl i32 %a, 8
diff --git a/test/CodeGen/R600/mul_uint24.ll b/test/CodeGen/R600/mul_uint24.ll
index 419f2751b83e..6d526c4b5d78 100644
--- a/test/CodeGen/R600/mul_uint24.ll
+++ b/test/CodeGen/R600/mul_uint24.ll
@@ -1,10 +1,10 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
-; FUNC-LABEL: @u32_mul24
+; FUNC-LABEL: {{^}}u32_mul24:
; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W
-; SI: V_MUL_U32_U24
+; SI: v_mul_u32_u24
define void @u32_mul24(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
@@ -17,13 +17,13 @@ entry:
ret void
}
-; FUNC-LABEL: @i16_mul24
+; FUNC-LABEL: {{^}}i16_mul24:
; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]]
; The result must be sign-extended
; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
; EG: 16
-; SI: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; SI: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 16,
+; SI: v_mul_u32_u24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; SI: v_bfe_i32 v{{[0-9]}}, [[MUL]], 0, 16
define void @i16_mul24(i32 addrspace(1)* %out, i16 %a, i16 %b) {
entry:
%0 = mul i16 %a, %b
@@ -32,12 +32,12 @@ entry:
ret void
}
-; FUNC-LABEL: @i8_mul24
+; FUNC-LABEL: {{^}}i8_mul24:
; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]]
; The result must be sign-extended
; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
-; SI: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; SI: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 8,
+; SI: v_mul_u32_u24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; SI: v_bfe_i32 v{{[0-9]}}, [[MUL]], 0, 8
define void @i8_mul24(i32 addrspace(1)* %out, i8 %a, i8 %b) {
entry:
@@ -48,12 +48,12 @@ entry:
}
; Multiply with 24-bit inputs and 64-bit output
-; FUNC_LABEL: @mul24_i64
+; FUNC_LABEL: {{^}}mul24_i64:
; EG; MUL_UINT24
; EG: MULHI
-; SI: V_MUL_U32_U24
+; SI: v_mul_u32_u24
; FIXME: SI support 24-bit mulhi
-; SI: V_MUL_HI_U32
+; SI: v_mul_hi_u32
define void @mul24_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%0 = shl i64 %a, 40
diff --git a/test/CodeGen/R600/mulhu.ll b/test/CodeGen/R600/mulhu.ll
index 864012748118..865971712306 100644
--- a/test/CodeGen/R600/mulhu.ll
+++ b/test/CodeGen/R600/mulhu.ll
@@ -1,8 +1,8 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;CHECK: V_MOV_B32_e32 v{{[0-9]+}}, 0xaaaaaaab
-;CHECK: V_MUL_HI_U32 v0, {{[sv][0-9]+}}, {{v[0-9]+}}
-;CHECK-NEXT: V_LSHRREV_B32_e32 v0, 1, v0
+;CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0xaaaaaaab
+;CHECK: v_mul_hi_u32 v0, {{v[0-9]+}}, {{s[0-9]+}}
+;CHECK-NEXT: v_lshrrev_b32_e32 v0, 1, v0
define void @test(i32 %p) {
%i = udiv i32 %p, 3
diff --git a/test/CodeGen/R600/no-initializer-constant-addrspace.ll b/test/CodeGen/R600/no-initializer-constant-addrspace.ll
index ab82e7ee7993..3c5e127625d6 100644
--- a/test/CodeGen/R600/no-initializer-constant-addrspace.ll
+++ b/test/CodeGen/R600/no-initializer-constant-addrspace.ll
@@ -1,9 +1,9 @@
-; RUN: llc -march=r600 -mcpu=SI -o /dev/null %s
+; RUN: llc -march=amdgcn -mcpu=SI -o /dev/null %s
; RUN: llc -march=r600 -mcpu=cypress -o /dev/null %s
@extern_const_addrspace = external unnamed_addr addrspace(2) constant [5 x i32], align 4
-; FUNC-LABEL: @load_extern_const_init
+; FUNC-LABEL: {{^}}load_extern_const_init:
define void @load_extern_const_init(i32 addrspace(1)* %out) nounwind {
%val = load i32 addrspace(2)* getelementptr ([5 x i32] addrspace(2)* @extern_const_addrspace, i64 0, i64 3), align 4
store i32 %val, i32 addrspace(1)* %out, align 4
@@ -12,7 +12,7 @@ define void @load_extern_const_init(i32 addrspace(1)* %out) nounwind {
@undef_const_addrspace = unnamed_addr addrspace(2) constant [5 x i32] undef, align 4
-; FUNC-LABEL: @load_undef_const_init
+; FUNC-LABEL: {{^}}load_undef_const_init:
define void @load_undef_const_init(i32 addrspace(1)* %out) nounwind {
%val = load i32 addrspace(2)* getelementptr ([5 x i32] addrspace(2)* @undef_const_addrspace, i64 0, i64 3), align 4
store i32 %val, i32 addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/no-shrink-extloads.ll b/test/CodeGen/R600/no-shrink-extloads.ll
new file mode 100644
index 000000000000..135d22d3036d
--- /dev/null
+++ b/test/CodeGen/R600/no-shrink-extloads.ll
@@ -0,0 +1,191 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+; Make sure we don't turn the 32-bit argument load into a 16-bit
+; load. There aren't extending scalar lods, so that would require
+; using a buffer_load instruction.
+
+; FUNC-LABEL: {{^}}truncate_kernarg_i32_to_i16:
+; SI: s_load_dword s
+; SI: buffer_store_short v
+define void @truncate_kernarg_i32_to_i16(i16 addrspace(1)* %out, i32 %arg) nounwind {
+ %trunc = trunc i32 %arg to i16
+ store i16 %trunc, i16 addrspace(1)* %out
+ ret void
+}
+
+; It should be OK (and probably performance neutral) to reduce this,
+; but we don't know if the load is uniform yet.
+
+; FUNC-LABEL: {{^}}truncate_buffer_load_i32_to_i16:
+; SI: buffer_load_dword v
+; SI: buffer_store_short v
+define void @truncate_buffer_load_i32_to_i16(i16 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i16 addrspace(1)* %out, i32 %tid
+ %load = load i32 addrspace(1)* %gep.in
+ %trunc = trunc i32 %load to i16
+ store i16 %trunc, i16 addrspace(1)* %gep.out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}truncate_kernarg_i32_to_i8:
+; SI: s_load_dword s
+; SI: buffer_store_byte v
+define void @truncate_kernarg_i32_to_i8(i8 addrspace(1)* %out, i32 %arg) nounwind {
+ %trunc = trunc i32 %arg to i8
+ store i8 %trunc, i8 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}truncate_buffer_load_i32_to_i8:
+; SI: buffer_load_dword v
+; SI: buffer_store_byte v
+define void @truncate_buffer_load_i32_to_i8(i8 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i8 addrspace(1)* %out, i32 %tid
+ %load = load i32 addrspace(1)* %gep.in
+ %trunc = trunc i32 %load to i8
+ store i8 %trunc, i8 addrspace(1)* %gep.out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}truncate_kernarg_i32_to_i1:
+; SI: s_load_dword s
+; SI: buffer_store_byte v
+define void @truncate_kernarg_i32_to_i1(i1 addrspace(1)* %out, i32 %arg) nounwind {
+ %trunc = trunc i32 %arg to i1
+ store i1 %trunc, i1 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}truncate_buffer_load_i32_to_i1:
+; SI: buffer_load_dword v
+; SI: buffer_store_byte v
+define void @truncate_buffer_load_i32_to_i1(i1 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i1 addrspace(1)* %out, i32 %tid
+ %load = load i32 addrspace(1)* %gep.in
+ %trunc = trunc i32 %load to i1
+ store i1 %trunc, i1 addrspace(1)* %gep.out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}truncate_kernarg_i64_to_i32:
+; SI: s_load_dword s
+; SI: buffer_store_dword v
+define void @truncate_kernarg_i64_to_i32(i32 addrspace(1)* %out, i64 %arg) nounwind {
+ %trunc = trunc i64 %arg to i32
+ store i32 %trunc, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}truncate_buffer_load_i64_to_i32:
+; SI: buffer_load_dword v
+; SI: buffer_store_dword v
+define void @truncate_buffer_load_i64_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.in = getelementptr i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %load = load i64 addrspace(1)* %gep.in
+ %trunc = trunc i64 %load to i32
+ store i32 %trunc, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}srl_kernarg_i64_to_i32:
+; SI: s_load_dword s
+; SI: buffer_store_dword v
+define void @srl_kernarg_i64_to_i32(i32 addrspace(1)* %out, i64 %arg) nounwind {
+ %srl = lshr i64 %arg, 32
+ %trunc = trunc i64 %srl to i32
+ store i32 %trunc, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}srl_buffer_load_i64_to_i32:
+; SI: buffer_load_dword v
+; SI: buffer_store_dword v
+define void @srl_buffer_load_i64_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.in = getelementptr i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %load = load i64 addrspace(1)* %gep.in
+ %srl = lshr i64 %load, 32
+ %trunc = trunc i64 %srl to i32
+ store i32 %trunc, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; Might as well reduce to 8-bit loads.
+; FUNC-LABEL: {{^}}truncate_kernarg_i16_to_i8:
+; SI: s_load_dword s
+; SI: buffer_store_byte v
+define void @truncate_kernarg_i16_to_i8(i8 addrspace(1)* %out, i16 %arg) nounwind {
+ %trunc = trunc i16 %arg to i8
+ store i8 %trunc, i8 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}truncate_buffer_load_i16_to_i8:
+; SI: buffer_load_ubyte v
+; SI: buffer_store_byte v
+define void @truncate_buffer_load_i16_to_i8(i8 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.in = getelementptr i16 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i8 addrspace(1)* %out, i32 %tid
+ %load = load i16 addrspace(1)* %gep.in
+ %trunc = trunc i16 %load to i8
+ store i8 %trunc, i8 addrspace(1)* %gep.out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}srl_kernarg_i64_to_i8:
+; SI: s_load_dword s
+; SI: buffer_store_byte v
+define void @srl_kernarg_i64_to_i8(i8 addrspace(1)* %out, i64 %arg) nounwind {
+ %srl = lshr i64 %arg, 32
+ %trunc = trunc i64 %srl to i8
+ store i8 %trunc, i8 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}srl_buffer_load_i64_to_i8:
+; SI: buffer_load_dword v
+; SI: buffer_store_byte v
+define void @srl_buffer_load_i64_to_i8(i8 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.in = getelementptr i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i8 addrspace(1)* %out, i32 %tid
+ %load = load i64 addrspace(1)* %gep.in
+ %srl = lshr i64 %load, 32
+ %trunc = trunc i64 %srl to i8
+ store i8 %trunc, i8 addrspace(1)* %gep.out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}truncate_kernarg_i64_to_i8:
+; SI: s_load_dword s
+; SI: buffer_store_byte v
+define void @truncate_kernarg_i64_to_i8(i8 addrspace(1)* %out, i64 %arg) nounwind {
+ %trunc = trunc i64 %arg to i8
+ store i8 %trunc, i8 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}truncate_buffer_load_i64_to_i8:
+; SI: buffer_load_dword v
+; SI: buffer_store_byte v
+define void @truncate_buffer_load_i64_to_i8(i8 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.in = getelementptr i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i8 addrspace(1)* %out, i32 %tid
+ %load = load i64 addrspace(1)* %gep.in
+ %trunc = trunc i64 %load to i8
+ store i8 %trunc, i8 addrspace(1)* %gep.out
+ ret void
+}
diff --git a/test/CodeGen/R600/operand-folding.ll b/test/CodeGen/R600/operand-folding.ll
new file mode 100644
index 000000000000..88a8145dcd62
--- /dev/null
+++ b/test/CodeGen/R600/operand-folding.ll
@@ -0,0 +1,113 @@
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
+
+; CHECK-LABEL: {{^}}fold_sgpr:
+; CHECK: v_add_i32_e32 v{{[0-9]+}}, s
+define void @fold_sgpr(i32 addrspace(1)* %out, i32 %fold) {
+entry:
+ %tmp0 = icmp ne i32 %fold, 0
+ br i1 %tmp0, label %if, label %endif
+
+if:
+ %id = call i32 @llvm.r600.read.tidig.x()
+ %offset = add i32 %fold, %id
+ %tmp1 = getelementptr i32 addrspace(1)* %out, i32 %offset
+ store i32 0, i32 addrspace(1)* %tmp1
+ br label %endif
+
+endif:
+ ret void
+}
+
+; CHECK-LABEL: {{^}}fold_imm:
+; CHECK v_or_i32_e32 v{{[0-9]+}}, 5
+define void @fold_imm(i32 addrspace(1)* %out, i32 %cmp) {
+entry:
+ %fold = add i32 3, 2
+ %tmp0 = icmp ne i32 %cmp, 0
+ br i1 %tmp0, label %if, label %endif
+
+if:
+ %id = call i32 @llvm.r600.read.tidig.x()
+ %val = or i32 %id, %fold
+ store i32 %val, i32 addrspace(1)* %out
+ br label %endif
+
+endif:
+ ret void
+}
+
+; CHECK-LABEL: {{^}}fold_64bit_constant_add:
+; CHECK-NOT: s_mov_b64
+; FIXME: It would be better if we could use v_add here and drop the extra
+; v_mov_b32 instructions.
+; CHECK-DAG: s_add_u32 [[LO:s[0-9]+]], s{{[0-9]+}}, 1
+; CHECK-DAG: s_addc_u32 [[HI:s[0-9]+]], s{{[0-9]+}}, 0
+; CHECK-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], [[LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], [[HI]]
+; CHECK: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}},
+
+define void @fold_64bit_constant_add(i64 addrspace(1)* %out, i32 %cmp, i64 %val) {
+entry:
+ %tmp0 = add i64 %val, 1
+ store i64 %tmp0, i64 addrspace(1)* %out
+ ret void
+}
+
+; Inline constants should always be folded.
+
+; CHECK-LABEL: {{^}}vector_inline:
+; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 5, v{{[0-9]+}}
+; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 5, v{{[0-9]+}}
+; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 5, v{{[0-9]+}}
+; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 5, v{{[0-9]+}}
+
+define void @vector_inline(<4 x i32> addrspace(1)* %out) {
+entry:
+ %tmp0 = call i32 @llvm.r600.read.tidig.x()
+ %tmp1 = add i32 %tmp0, 1
+ %tmp2 = add i32 %tmp0, 2
+ %tmp3 = add i32 %tmp0, 3
+ %vec0 = insertelement <4 x i32> undef, i32 %tmp0, i32 0
+ %vec1 = insertelement <4 x i32> %vec0, i32 %tmp1, i32 1
+ %vec2 = insertelement <4 x i32> %vec1, i32 %tmp2, i32 2
+ %vec3 = insertelement <4 x i32> %vec2, i32 %tmp3, i32 3
+ %tmp4 = xor <4 x i32> <i32 5, i32 5, i32 5, i32 5>, %vec3
+ store <4 x i32> %tmp4, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; Immediates with one use should be folded
+; CHECK-LABEL: {{^}}imm_one_use:
+; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 0x64, v{{[0-9]+}}
+
+define void @imm_one_use(i32 addrspace(1)* %out) {
+entry:
+ %tmp0 = call i32 @llvm.r600.read.tidig.x()
+ %tmp1 = xor i32 %tmp0, 100
+ store i32 %tmp1, i32 addrspace(1)* %out
+ ret void
+}
+; CHECK-LABEL: {{^}}vector_imm:
+; CHECK: s_movk_i32 [[IMM:s[0-9]+]], 0x64
+; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}}
+; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}}
+; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}}
+; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}}
+
+define void @vector_imm(<4 x i32> addrspace(1)* %out) {
+entry:
+ %tmp0 = call i32 @llvm.r600.read.tidig.x()
+ %tmp1 = add i32 %tmp0, 1
+ %tmp2 = add i32 %tmp0, 2
+ %tmp3 = add i32 %tmp0, 3
+ %vec0 = insertelement <4 x i32> undef, i32 %tmp0, i32 0
+ %vec1 = insertelement <4 x i32> %vec0, i32 %tmp1, i32 1
+ %vec2 = insertelement <4 x i32> %vec1, i32 %tmp2, i32 2
+ %vec3 = insertelement <4 x i32> %vec2, i32 %tmp3, i32 3
+ %tmp4 = xor <4 x i32> <i32 100, i32 100, i32 100, i32 100>, %vec3
+ store <4 x i32> %tmp4, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+declare i32 @llvm.r600.read.tidig.x() #0
+attributes #0 = { readnone }
diff --git a/test/CodeGen/R600/operand-spacing.ll b/test/CodeGen/R600/operand-spacing.ll
new file mode 100644
index 000000000000..dd9f25aad7f2
--- /dev/null
+++ b/test/CodeGen/R600/operand-spacing.ll
@@ -0,0 +1,15 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s
+
+; Make sure there isn't an extra space between the instruction name and first operands.
+
+; SI-LABEL: {{^}}add_f32:
+; SI-DAG: s_load_dword [[SREGA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[SREGB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: v_mov_b32_e32 [[VREGB:v[0-9]+]], [[SREGB]]
+; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SREGA]], [[VREGB]]
+; SI: buffer_store_dword [[RESULT]],
+define void @add_f32(float addrspace(1)* %out, float %a, float %b) {
+ %result = fadd float %a, %b
+ store float %result, float addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/or.ll b/test/CodeGen/R600/or.ll
index 3c3b475d077c..d7dfb7ab1c2e 100644
--- a/test/CodeGen/R600/or.ll
+++ b/test/CodeGen/R600/or.ll
@@ -1,13 +1,13 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s
-; EG-LABEL: @or_v2i32
+; EG-LABEL: {{^}}or_v2i32:
; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI-LABEL: @or_v2i32
-; SI: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI-LABEL: {{^}}or_v2i32:
+; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @or_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
@@ -18,17 +18,17 @@ define void @or_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in)
ret void
}
-; EG-LABEL: @or_v4i32
+; EG-LABEL: {{^}}or_v4i32:
; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI-LABEL: @or_v4i32
-; SI: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI-LABEL: {{^}}or_v4i32:
+; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
@@ -39,16 +39,16 @@ define void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in)
ret void
}
-; SI-LABEL: @scalar_or_i32
-; SI: S_OR_B32
+; SI-LABEL: {{^}}scalar_or_i32:
+; SI: s_or_b32
define void @scalar_or_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
%or = or i32 %a, %b
store i32 %or, i32 addrspace(1)* %out
ret void
}
-; SI-LABEL: @vector_or_i32
-; SI: V_OR_B32_e32 v{{[0-9]}}
+; SI-LABEL: {{^}}vector_or_i32:
+; SI: v_or_b32_e32 v{{[0-9]}}
define void @vector_or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 %b) {
%loada = load i32 addrspace(1)* %a
%or = or i32 %loada, %b
@@ -56,20 +56,46 @@ define void @vector_or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 %b)
ret void
}
-; EG-LABEL: @scalar_or_i64
+; SI-LABEL: {{^}}scalar_or_literal_i32:
+; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x1869f
+define void @scalar_or_literal_i32(i32 addrspace(1)* %out, i32 %a) {
+ %or = or i32 %a, 99999
+ store i32 %or, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}vector_or_literal_i32:
+; SI: v_or_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
+define void @vector_or_literal_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
+ %loada = load i32 addrspace(1)* %a, align 4
+ %or = or i32 %loada, 65535
+ store i32 %or, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}vector_or_inline_immediate_i32:
+; SI: v_or_b32_e32 v{{[0-9]+}}, 4, v{{[0-9]+}}
+define void @vector_or_inline_immediate_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
+ %loada = load i32 addrspace(1)* %a, align 4
+ %or = or i32 %loada, 4
+ store i32 %or, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; EG-LABEL: {{^}}scalar_or_i64:
; EG-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
; EG-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
-; SI-LABEL: @scalar_or_i64
-; SI: S_OR_B64
+; SI-LABEL: {{^}}scalar_or_i64:
+; SI: s_or_b64
define void @scalar_or_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
%or = or i64 %a, %b
store i64 %or, i64 addrspace(1)* %out
ret void
}
-; SI-LABEL: @vector_or_i64
-; SI: V_OR_B32_e32 v{{[0-9]}}
-; SI: V_OR_B32_e32 v{{[0-9]}}
+; SI-LABEL: {{^}}vector_or_i64:
+; SI: v_or_b32_e32 v{{[0-9]}}
+; SI: v_or_b32_e32 v{{[0-9]}}
define void @vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64 addrspace(1)* %a, align 8
%loadb = load i64 addrspace(1)* %a, align 8
@@ -78,9 +104,9 @@ define void @vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
ret void
}
-; SI-LABEL: @scalar_vector_or_i64
-; SI: V_OR_B32_e32 v{{[0-9]}}
-; SI: V_OR_B32_e32 v{{[0-9]}}
+; SI-LABEL: {{^}}scalar_vector_or_i64:
+; SI: v_or_b32_e32 v{{[0-9]}}
+; SI: v_or_b32_e32 v{{[0-9]}}
define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 %b) {
%loada = load i64 addrspace(1)* %a
%or = or i64 %loada, %b
@@ -88,13 +114,13 @@ define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a,
ret void
}
-; SI-LABEL: @vector_or_i64_loadimm
-; SI-DAG: S_MOV_B32 [[LO_S_IMM:s[0-9]+]], 0xdf77987f
-; SI-DAG: S_MOV_B32 [[HI_S_IMM:s[0-9]+]], 0x146f
-; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
-; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}vector_or_i64_loadimm:
+; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xdf77987f
+; SI-DAG: s_movk_i32 [[HI_S_IMM:s[0-9]+]], 0x146f
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI: s_endpgm
define void @vector_or_i64_loadimm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64 addrspace(1)* %a, align 8
%or = or i64 %loada, 22470723082367
@@ -103,11 +129,11 @@ define void @vector_or_i64_loadimm(i64 addrspace(1)* %out, i64 addrspace(1)* %a,
}
; FIXME: The or 0 should really be removed.
-; SI-LABEL: @vector_or_i64_imm
-; SI: BUFFER_LOAD_DWORDX2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
-; SI: V_OR_B32_e32 {{v[0-9]+}}, 8, v[[LO_VREG]]
-; SI: V_OR_B32_e32 {{v[0-9]+}}, 0, {{.*}}
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}vector_or_i64_imm:
+; SI: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI: v_or_b32_e32 {{v[0-9]+}}, 8, v[[LO_VREG]]
+; SI: v_or_b32_e32 {{v[0-9]+}}, 0, {{.*}}
+; SI: s_endpgm
define void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64 addrspace(1)* %a, align 8
%or = or i64 %loada, 8
@@ -115,12 +141,12 @@ define void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64
ret void
}
-; SI-LABEL: @trunc_i64_or_to_i32
-; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG0:[0-9]+]]
-; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG1:[0-9]+]]
-; SI: S_OR_B32 [[SRESULT:s[0-9]+]], s[[SREG1]], s[[SREG0]]
-; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
-; SI: BUFFER_STORE_DWORD [[VRESULT]],
+; SI-LABEL: {{^}}trunc_i64_or_to_i32:
+; SI: s_load_dword s[[SREG0:[0-9]+]]
+; SI: s_load_dword s[[SREG1:[0-9]+]]
+; SI: s_or_b32 s[[SRESULT:[0-9]+]], s[[SREG1]], s[[SREG0]]
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], s[[SRESULT]]
+; SI: buffer_store_dword [[VRESULT]],
define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
%add = or i64 %b, %a
%trunc = trunc i64 %add to i32
@@ -128,11 +154,11 @@ define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
ret void
}
-; EG-CHECK: @or_i1
+; EG-CHECK: {{^}}or_i1:
; EG-CHECK: OR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
-; SI-CHECK: @or_i1
-; SI-CHECK: S_OR_B64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
+; SI-CHECK: {{^}}or_i1:
+; SI-CHECK: s_or_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
define void @or_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
%a = load float addrspace(1) * %in0
%b = load float addrspace(1) * %in1
diff --git a/test/CodeGen/R600/packetizer.ll b/test/CodeGen/R600/packetizer.ll
index 0a405c57ea93..49a7c0df748f 100644
--- a/test/CodeGen/R600/packetizer.ll
+++ b/test/CodeGen/R600/packetizer.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s
-; CHECK: @test
+; CHECK: {{^}}test:
; CHECK: BIT_ALIGN_INT T{{[0-9]}}.X
; CHECK: BIT_ALIGN_INT T{{[0-9]}}.Y
; CHECK: BIT_ALIGN_INT T{{[0-9]}}.Z
diff --git a/test/CodeGen/R600/parallelandifcollapse.ll b/test/CodeGen/R600/parallelandifcollapse.ll
index 8a269e0cb43a..82b11501e865 100644
--- a/test/CodeGen/R600/parallelandifcollapse.ll
+++ b/test/CodeGen/R600/parallelandifcollapse.ll
@@ -1,5 +1,5 @@
; Function Attrs: nounwind
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc -march=r600 -mcpu=redwood -mattr=-promote-alloca < %s | FileCheck %s
;
; CFG flattening should use parallel-and mode to generate branch conditions and
; then merge if-regions with the same bodies.
@@ -11,7 +11,6 @@
; FIXME: For some reason having the allocas here allowed the flatten cfg pass
; to do its transfomation, however now that we are using local memory for
; allocas, the transformation isn't happening.
-; XFAIL: *
define void @_Z9chk1D_512v() #0 {
entry:
diff --git a/test/CodeGen/R600/predicate-dp4.ll b/test/CodeGen/R600/predicate-dp4.ll
index e48d6a7aa9a8..6bc187594359 100644
--- a/test/CodeGen/R600/predicate-dp4.ll
+++ b/test/CodeGen/R600/predicate-dp4.ll
@@ -1,6 +1,6 @@
;RUN: llc < %s -march=r600 -mcpu=cayman
-; CHECK-LABEL: @main
+; CHECK-LABEL: {{^}}main:
; CHECK: PRED_SETE_INT * Pred,
; CHECK: DOT4 T{{[0-9]+}}.X, T0.X, T0.X, Pred_sel_one
define void @main(<4 x float> inreg) #0 {
diff --git a/test/CodeGen/R600/predicates.ll b/test/CodeGen/R600/predicates.ll
index 902508ff9e05..0ce74d97ba8e 100644
--- a/test/CodeGen/R600/predicates.ll
+++ b/test/CodeGen/R600/predicates.ll
@@ -3,7 +3,7 @@
; These tests make sure the compiler is optimizing branches using predicates
; when it is legal to do so.
-; CHECK: @simple_if
+; CHECK: {{^}}simple_if:
; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Pred,
; CHECK: LSHL * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel
define void @simple_if(i32 addrspace(1)* %out, i32 %in) {
@@ -21,7 +21,7 @@ ENDIF:
ret void
}
-; CHECK: @simple_if_else
+; CHECK: {{^}}simple_if_else:
; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Pred,
; CHECK: LSH{{[LR] \* T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel
; CHECK: LSH{{[LR] \* T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel
@@ -44,7 +44,7 @@ ENDIF:
ret void
}
-; CHECK: @nested_if
+; CHECK: {{^}}nested_if:
; CHECK: ALU_PUSH_BEFORE
; CHECK: JUMP
; CHECK: POP
@@ -71,7 +71,7 @@ ENDIF:
ret void
}
-; CHECK: @nested_if_else
+; CHECK: {{^}}nested_if_else:
; CHECK: ALU_PUSH_BEFORE
; CHECK: JUMP
; CHECK: POP
diff --git a/test/CodeGen/R600/private-memory-atomics.ll b/test/CodeGen/R600/private-memory-atomics.ll
index def4f9dee521..765563783831 100644
--- a/test/CodeGen/R600/private-memory-atomics.ll
+++ b/test/CodeGen/R600/private-memory-atomics.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s
; This works because promote allocas pass replaces these with LDS atomics.
diff --git a/test/CodeGen/R600/private-memory-broken.ll b/test/CodeGen/R600/private-memory-broken.ll
index 40860858eb0f..173309496135 100644
--- a/test/CodeGen/R600/private-memory-broken.ll
+++ b/test/CodeGen/R600/private-memory-broken.ll
@@ -1,4 +1,4 @@
-; RUN: not llc -verify-machineinstrs -march=r600 -mcpu=SI %s -o /dev/null 2>&1 | FileCheck %s
+; RUN: not llc -verify-machineinstrs -march=amdgcn -mcpu=SI %s -o /dev/null 2>&1 | FileCheck %s
; Make sure promote alloca pass doesn't crash
diff --git a/test/CodeGen/R600/private-memory.ll b/test/CodeGen/R600/private-memory.ll
index 124d9fa64505..15153c69a48d 100644
--- a/test/CodeGen/R600/private-memory.ll
+++ b/test/CodeGen/R600/private-memory.ll
@@ -1,23 +1,23 @@
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
-; RUN: llc -mattr=+promote-alloca -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
-; RUN: llc -mattr=-promote-alloca -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
+; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
+; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-; FUNC-LABEL: @mova_same_clause
+; FUNC-LABEL: {{^}}mova_same_clause:
; R600: LDS_WRITE
; R600: LDS_WRITE
; R600: LDS_READ
; R600: LDS_READ
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_READ_B32
-; SI-PROMOTE: DS_READ_B32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_read_b32
+; SI-PROMOTE: ds_read_b32
-; SI-ALLOCA: BUFFER_STORE_DWORD v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}
-; SI-ALLOCA: BUFFER_STORE_DWORD v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}
+; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0
+; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0
define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
entry:
%stack = alloca [5 x i32], align 4
@@ -45,10 +45,10 @@ entry:
; XXX: This generated code has unnecessary MOVs, we should be able to optimize
; this.
-; FUNC-LABEL: @multiple_structs
+; FUNC-LABEL: {{^}}multiple_structs:
; R600-NOT: MOVA_INT
-; SI-NOT: V_MOVREL
-; SI-NOT: V_MOVREL
+; SI-NOT: v_movrel
+; SI-NOT: v_movrel
%struct.point = type { i32, i32 }
define void @multiple_structs(i32 addrspace(1)* %out) {
@@ -76,9 +76,9 @@ entry:
; loads and stores should be lowered to copies, so there shouldn't be any
; MOVA instructions.
-; FUNC-LABEL: @direct_loop
+; FUNC-LABEL: {{^}}direct_loop:
; R600-NOT: MOVA_INT
-; SI-NOT: V_MOVREL
+; SI-NOT: v_movrel
define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
@@ -112,14 +112,13 @@ for.end:
ret void
}
-; FUNC-LABEL: @short_array
+; FUNC-LABEL: {{^}}short_array:
; R600: MOVA_INT
-; SI-PROMOTE: BUFFER_STORE_SHORT v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}
-; SI-PROMOTE: BUFFER_STORE_SHORT v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}
-; SI-PROMOTE-NOT: MOVREL
-; SI-PROMOTE: BUFFER_LOAD_SSHORT v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] + v{{[0-9]+}} + s{{[0-9]+}}
+; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x68,0xe0
+; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:2 ; encoding: [0x02,0x10,0x68,0xe0
+; SI-PROMOTE: buffer_load_sshort v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
define void @short_array(i32 addrspace(1)* %out, i32 %index) {
entry:
%0 = alloca [2 x i16]
@@ -134,12 +133,12 @@ entry:
ret void
}
-; FUNC-LABEL: @char_array
+; FUNC-LABEL: {{^}}char_array:
; R600: MOVA_INT
-; SI-DAG: BUFFER_STORE_BYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}, 0x0
-; SI-DAG: BUFFER_STORE_BYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}, 0x1
+; SI-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x60,0xe0
+; SI-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:1 ; encoding: [0x01,0x10,0x60,0xe0
define void @char_array(i32 addrspace(1)* %out, i32 %index) {
entry:
%0 = alloca [2 x i8]
@@ -157,12 +156,12 @@ entry:
; Make sure we don't overwrite workitem information with private memory
-; FUNC-LABEL: @work_item_info
+; FUNC-LABEL: {{^}}work_item_info:
; R600-NOT: MOV T0.X
; Additional check in case the move ends up in the last slot
; R600-NOT: MOV * TO.X
-; SI-NOT: V_MOV_B32_e{{(32|64)}} v0
+; SI-NOT: v_mov_b32_e{{(32|64)}} v0
define void @work_item_info(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = alloca [2 x i32]
@@ -180,11 +179,11 @@ entry:
; Test that two stack objects are not stored in the same register
; The second stack object should be in T3.X
-; FUNC-LABEL: @no_overlap
+; FUNC-LABEL: {{^}}no_overlap:
; R600_CHECK: MOV
; R600_CHECK: [[CHAN:[XYZW]]]+
; R600-NOT: [[CHAN]]+
-; SI: V_MOV_B32_e32 v3
+; SI: v_mov_b32_e32 v3
define void @no_overlap(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = alloca [3 x i8], align 1
@@ -291,3 +290,22 @@ entry:
ret void
}
+; AMDGPUPromoteAlloca does not know how to handle ptrtoint. When it
+; finds one, it should stop trying to promote.
+
+; FUNC-LABEL: ptrtoint:
+; SI-NOT: ds_write
+; SI: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
+; SI: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:5
+define void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+ %alloca = alloca [16 x i32]
+ %tmp0 = getelementptr [16 x i32]* %alloca, i32 0, i32 %a
+ store i32 5, i32* %tmp0
+ %tmp1 = ptrtoint [16 x i32]* %alloca to i32
+ %tmp2 = add i32 %tmp1, 5
+ %tmp3 = inttoptr i32 %tmp2 to i32*
+ %tmp4 = getelementptr i32* %tmp3, i32 %b
+ %tmp5 = load i32* %tmp4
+ store i32 %tmp5, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/pv.ll b/test/CodeGen/R600/pv.ll
index 55eb56d3fb1d..1908f15949a2 100644
--- a/test/CodeGen/R600/pv.ll
+++ b/test/CodeGen/R600/pv.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=r600 | FileCheck %s
-;CHECK: DOT4 * T{{[0-9]\.W}} (MASKED)
-;CHECK: MAX T{{[0-9].[XYZW]}}, PV.X, 0.0
+; CHECK: DOT4 * T{{[0-9]\.W}} (MASKED)
+; CHECK: MAX T{{[0-9].[XYZW]}}, 0.0, PV.X
define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7) #0 {
main_body:
diff --git a/test/CodeGen/R600/r600-encoding.ll b/test/CodeGen/R600/r600-encoding.ll
index b760c882f4e3..112cdac0a1b8 100644
--- a/test/CodeGen/R600/r600-encoding.ll
+++ b/test/CodeGen/R600/r600-encoding.ll
@@ -4,10 +4,10 @@
; The earliest R600 GPUs have a slightly different encoding than the rest of
; the VLIW4/5 GPUs.
-; EG-CHECK: @test
+; EG-CHECK: {{^}}test:
; EG-CHECK: MUL_IEEE {{[ *TXYZWPVxyzw.,0-9]+}} ; encoding: [{{0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x10,0x01,0x[0-9a-f]+,0x[0-9a-f]+}}]
-; R600-CHECK: @test
+; R600-CHECK: {{^}}test:
; R600-CHECK: MUL_IEEE {{[ *TXYZWPVxyzw.,0-9]+}} ; encoding: [{{0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x10,0x02,0x[0-9a-f]+,0x[0-9a-f]+}}]
define void @test(<4 x float> inreg %reg0) #0 {
diff --git a/test/CodeGen/R600/r600-export-fix.ll b/test/CodeGen/R600/r600-export-fix.ll
index 73bc0635ab21..7d7285632078 100644
--- a/test/CodeGen/R600/r600-export-fix.ll
+++ b/test/CodeGen/R600/r600-export-fix.ll
@@ -3,9 +3,9 @@
;CHECK: EXPORT T{{[0-9]}}.XYZW
;CHECK: EXPORT T{{[0-9]}}.0000
;CHECK: EXPORT T{{[0-9]}}.0000
-;CHECK: EXPORT T{{[0-9]}}.0XZW
+;CHECK: EXPORT T{{[0-9]}}.0XYZ
;CHECK: EXPORT T{{[0-9]}}.XYZW
-;CHECK: EXPORT T{{[0-9]}}.YX00
+;CHECK: EXPORT T{{[0-9]}}.YZ00
;CHECK: EXPORT T{{[0-9]}}.0000
;CHECK: EXPORT T{{[0-9]}}.0000
diff --git a/test/CodeGen/R600/r600-infinite-loop-bug-while-reorganizing-vector.ll b/test/CodeGen/R600/r600-infinite-loop-bug-while-reorganizing-vector.ll
index c89398f00662..f388f8ffe293 100644
--- a/test/CodeGen/R600/r600-infinite-loop-bug-while-reorganizing-vector.ll
+++ b/test/CodeGen/R600/r600-infinite-loop-bug-while-reorganizing-vector.ll
@@ -1,5 +1,4 @@
;RUN: llc < %s -march=r600 -mcpu=cayman
-;REQUIRES: asserts
define void @main(<4 x float> inreg, <4 x float> inreg) #0 {
main_body:
diff --git a/test/CodeGen/R600/r600cfg.ll b/test/CodeGen/R600/r600cfg.ll
index 6dee3ef89cf5..dddc9de7e963 100644
--- a/test/CodeGen/R600/r600cfg.ll
+++ b/test/CodeGen/R600/r600cfg.ll
@@ -1,5 +1,4 @@
;RUN: llc < %s -march=r600 -mcpu=redwood
-;REQUIRES: asserts
define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
main_body:
diff --git a/test/CodeGen/R600/register-count-comments.ll b/test/CodeGen/R600/register-count-comments.ll
index 329077cde57d..2b49f977def7 100644
--- a/test/CodeGen/R600/register-count-comments.ll
+++ b/test/CodeGen/R600/register-count-comments.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -asm-verbose < %s | FileCheck -check-prefix=SI %s
declare i32 @llvm.SI.tid() nounwind readnone
-; SI-LABEL: @foo:
+; SI-LABEL: {{^}}foo:
; SI: .section .AMDGPU.csdata
; SI: ; Kernel info:
; SI: ; NumSgprs: {{[0-9]+}}
@@ -18,3 +18,10 @@ define void @foo(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %abase, i32 a
store i32 %result, i32 addrspace(1)* %outptr, align 4
ret void
}
+
+; SI-LABEL: {{^}}one_vgpr_used:
+; SI: NumVgprs: 1
+define void @one_vgpr_used(i32 addrspace(1)* %out, i32 %x) nounwind {
+ store i32 %x, i32 addrspace(1)* %out, align 4
+ ret void
+}
diff --git a/test/CodeGen/R600/reorder-stores.ll b/test/CodeGen/R600/reorder-stores.ll
index be2fcc6849fb..81e424aab21e 100644
--- a/test/CodeGen/R600/reorder-stores.ll
+++ b/test/CodeGen/R600/reorder-stores.ll
@@ -1,15 +1,15 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI %s
-; SI-LABEL: @no_reorder_v2f64_global_load_store
-; SI: BUFFER_LOAD_DWORDX2
-; SI: BUFFER_LOAD_DWORDX2
-; SI: BUFFER_LOAD_DWORDX2
-; SI: BUFFER_LOAD_DWORDX2
-; SI: BUFFER_STORE_DWORDX2
-; SI: BUFFER_STORE_DWORDX2
-; SI: BUFFER_STORE_DWORDX2
-; SI: BUFFER_STORE_DWORDX2
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}no_reorder_v2f64_global_load_store:
+; SI: buffer_load_dwordx2
+; SI: buffer_load_dwordx2
+; SI: buffer_load_dwordx2
+; SI: buffer_load_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
define void @no_reorder_v2f64_global_load_store(<2 x double> addrspace(1)* nocapture %x, <2 x double> addrspace(1)* nocapture %y) nounwind {
%tmp1 = load <2 x double> addrspace(1)* %x, align 16
%tmp4 = load <2 x double> addrspace(1)* %y, align 16
@@ -18,12 +18,12 @@ define void @no_reorder_v2f64_global_load_store(<2 x double> addrspace(1)* nocap
ret void
}
-; SI-LABEL: @no_reorder_scalarized_v2f64_local_load_store
-; SI: DS_READ_B64
-; SI: DS_READ_B64
-; SI: DS_WRITE_B64
-; SI: DS_WRITE_B64
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}no_reorder_scalarized_v2f64_local_load_store:
+; SI: ds_read_b64
+; SI: ds_read_b64
+; SI: ds_write_b64
+; SI: ds_write_b64
+; SI: s_endpgm
define void @no_reorder_scalarized_v2f64_local_load_store(<2 x double> addrspace(3)* nocapture %x, <2 x double> addrspace(3)* nocapture %y) nounwind {
%tmp1 = load <2 x double> addrspace(3)* %x, align 16
%tmp4 = load <2 x double> addrspace(3)* %y, align 16
@@ -32,48 +32,48 @@ define void @no_reorder_scalarized_v2f64_local_load_store(<2 x double> addrspace
ret void
}
-; SI-LABEL: @no_reorder_split_v8i32_global_load_store
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
+; SI-LABEL: {{^}}no_reorder_split_v8i32_global_load_store:
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: S_ENDPGM
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: s_endpgm
define void @no_reorder_split_v8i32_global_load_store(<8 x i32> addrspace(1)* nocapture %x, <8 x i32> addrspace(1)* nocapture %y) nounwind {
%tmp1 = load <8 x i32> addrspace(1)* %x, align 32
%tmp4 = load <8 x i32> addrspace(1)* %y, align 32
@@ -82,13 +82,13 @@ define void @no_reorder_split_v8i32_global_load_store(<8 x i32> addrspace(1)* no
ret void
}
-; SI-LABEL: @no_reorder_extload_64
-; SI: DS_READ_B64
-; SI: DS_READ_B64
-; SI: DS_WRITE_B64
-; SI-NOT: DS_READ
-; SI: DS_WRITE_B64
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}no_reorder_extload_64:
+; SI: ds_read_b64
+; SI: ds_read_b64
+; SI: ds_write_b64
+; SI-NOT: ds_read
+; SI: ds_write_b64
+; SI: s_endpgm
define void @no_reorder_extload_64(<2 x i32> addrspace(3)* nocapture %x, <2 x i32> addrspace(3)* nocapture %y) nounwind {
%tmp1 = load <2 x i32> addrspace(3)* %x, align 8
%tmp4 = load <2 x i32> addrspace(3)* %y, align 8
diff --git a/test/CodeGen/R600/rotl.i64.ll b/test/CodeGen/R600/rotl.i64.ll
index bda0b6694a8d..13f251ee63e1 100644
--- a/test/CodeGen/R600/rotl.i64.ll
+++ b/test/CodeGen/R600/rotl.i64.ll
@@ -1,10 +1,11 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; FUNC-LABEL: @s_rotl_i64:
-; SI: S_LSHL_B64
-; SI: S_SUB_I32
-; SI: S_LSHR_B64
-; SI: S_OR_B64
+; FUNC-LABEL: {{^}}s_rotl_i64:
+; SI-DAG: s_lshl_b64
+; SI-DAG: s_sub_i32
+; SI-DAG: s_lshr_b64
+; SI: s_or_b64
+; SI: s_endpgm
define void @s_rotl_i64(i64 addrspace(1)* %in, i64 %x, i64 %y) {
entry:
%0 = shl i64 %x, %y
@@ -15,12 +16,13 @@ entry:
ret void
}
-; FUNC-LABEL: @v_rotl_i64:
-; SI: V_LSHL_B64
-; SI: V_SUB_I32
-; SI: V_LSHR_B64
-; SI: V_OR_B32
-; SI: V_OR_B32
+; FUNC-LABEL: {{^}}v_rotl_i64:
+; SI-DAG: v_lshl_b64
+; SI-DAG: v_sub_i32
+; SI: v_lshr_b64
+; SI: v_or_b32
+; SI: v_or_b32
+; SI: s_endpgm
define void @v_rotl_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) {
entry:
%x = load i64 addrspace(1)* %xptr, align 8
diff --git a/test/CodeGen/R600/rotl.ll b/test/CodeGen/R600/rotl.ll
index 83f657fd4cce..bcf8890beeba 100644
--- a/test/CodeGen/R600/rotl.ll
+++ b/test/CodeGen/R600/rotl.ll
@@ -1,14 +1,14 @@
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=R600 -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; FUNC-LABEL: @rotl_i32:
+; FUNC-LABEL: {{^}}rotl_i32:
; R600: SUB_INT {{\** T[0-9]+\.[XYZW]}}, literal.x
; R600-NEXT: 32
; R600: BIT_ALIGN_INT {{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].Z, PV.{{[XYZW]}}
-; SI: S_SUB_I32 [[SDST:s[0-9]+]], 32, {{[s][0-9]+}}
-; SI: V_MOV_B32_e32 [[VDST:v[0-9]+]], [[SDST]]
-; SI: V_ALIGNBIT_B32 {{v[0-9]+, [s][0-9]+, v[0-9]+}}, [[VDST]]
+; SI: s_sub_i32 [[SDST:s[0-9]+]], 32, {{[s][0-9]+}}
+; SI: v_mov_b32_e32 [[VDST:v[0-9]+]], [[SDST]]
+; SI: v_alignbit_b32 {{v[0-9]+, [s][0-9]+, s[0-9]+}}, [[VDST]]
define void @rotl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y) {
entry:
%0 = shl i32 %x, %y
@@ -19,11 +19,12 @@ entry:
ret void
}
-; FUNC-LABEL: @rotl_v2i32
-; SI: S_SUB_I32
-; SI: V_ALIGNBIT_B32
-; SI: S_SUB_I32
-; SI: V_ALIGNBIT_B32
+; FUNC-LABEL: {{^}}rotl_v2i32:
+; SI-DAG: s_sub_i32
+; SI-DAG: s_sub_i32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI: s_endpgm
define void @rotl_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) {
entry:
%0 = shl <2 x i32> %x, %y
@@ -34,15 +35,16 @@ entry:
ret void
}
-; FUNC-LABEL: @rotl_v4i32
-; SI: S_SUB_I32
-; SI: V_ALIGNBIT_B32
-; SI: S_SUB_I32
-; SI: V_ALIGNBIT_B32
-; SI: S_SUB_I32
-; SI: V_ALIGNBIT_B32
-; SI: S_SUB_I32
-; SI: V_ALIGNBIT_B32
+; FUNC-LABEL: {{^}}rotl_v4i32:
+; SI-DAG: s_sub_i32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: s_sub_i32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: s_sub_i32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: s_sub_i32
+; SI-DAG: v_alignbit_b32
+; SI: s_endpgm
define void @rotl_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) {
entry:
%0 = shl <4 x i32> %x, %y
diff --git a/test/CodeGen/R600/rotr.i64.ll b/test/CodeGen/R600/rotr.i64.ll
index c264751baeb1..4568859b1ae9 100644
--- a/test/CodeGen/R600/rotr.i64.ll
+++ b/test/CodeGen/R600/rotr.i64.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; FUNC-LABEL: @s_rotr_i64
-; SI: S_LSHR_B64
-; SI: S_SUB_I32
-; SI: S_LSHL_B64
-; SI: S_OR_B64
+; FUNC-LABEL: {{^}}s_rotr_i64:
+; SI-DAG: s_sub_i32
+; SI-DAG: s_lshr_b64
+; SI-DAG: s_lshl_b64
+; SI: s_or_b64
define void @s_rotr_i64(i64 addrspace(1)* %in, i64 %x, i64 %y) {
entry:
%tmp0 = sub i64 64, %y
@@ -15,12 +15,12 @@ entry:
ret void
}
-; FUNC-LABEL: @v_rotr_i64
-; SI: V_LSHR_B64
-; SI: V_SUB_I32
-; SI: V_LSHL_B64
-; SI: V_OR_B32
-; SI: V_OR_B32
+; FUNC-LABEL: {{^}}v_rotr_i64:
+; SI-DAG: v_sub_i32
+; SI-DAG: v_lshr_b64
+; SI-DAG: v_lshl_b64
+; SI: v_or_b32
+; SI: v_or_b32
define void @v_rotr_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) {
entry:
%x = load i64 addrspace(1)* %xptr, align 8
@@ -33,7 +33,7 @@ entry:
ret void
}
-; FUNC-LABEL: @s_rotr_v2i64
+; FUNC-LABEL: {{^}}s_rotr_v2i64:
define void @s_rotr_v2i64(<2 x i64> addrspace(1)* %in, <2 x i64> %x, <2 x i64> %y) {
entry:
%tmp0 = sub <2 x i64> <i64 64, i64 64>, %y
@@ -44,7 +44,7 @@ entry:
ret void
}
-; FUNC-LABEL: @v_rotr_v2i64
+; FUNC-LABEL: {{^}}v_rotr_v2i64:
define void @v_rotr_v2i64(<2 x i64> addrspace(1)* %in, <2 x i64> addrspace(1)* %xptr, <2 x i64> addrspace(1)* %yptr) {
entry:
%x = load <2 x i64> addrspace(1)* %xptr, align 8
diff --git a/test/CodeGen/R600/rotr.ll b/test/CodeGen/R600/rotr.ll
index a5a4da480738..dcd036e450da 100644
--- a/test/CodeGen/R600/rotr.ll
+++ b/test/CodeGen/R600/rotr.ll
@@ -1,10 +1,10 @@
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=R600 -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; FUNC-LABEL: @rotr_i32:
+; FUNC-LABEL: {{^}}rotr_i32:
; R600: BIT_ALIGN_INT
-; SI: V_ALIGNBIT_B32
+; SI: v_alignbit_b32
define void @rotr_i32(i32 addrspace(1)* %in, i32 %x, i32 %y) {
entry:
%tmp0 = sub i32 32, %y
@@ -15,12 +15,12 @@ entry:
ret void
}
-; FUNC-LABEL: @rotr_v2i32:
+; FUNC-LABEL: {{^}}rotr_v2i32:
; R600: BIT_ALIGN_INT
; R600: BIT_ALIGN_INT
-; SI: V_ALIGNBIT_B32
-; SI: V_ALIGNBIT_B32
+; SI: v_alignbit_b32
+; SI: v_alignbit_b32
define void @rotr_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) {
entry:
%tmp0 = sub <2 x i32> <i32 32, i32 32>, %y
@@ -31,16 +31,16 @@ entry:
ret void
}
-; FUNC-LABEL: @rotr_v4i32:
+; FUNC-LABEL: {{^}}rotr_v4i32:
; R600: BIT_ALIGN_INT
; R600: BIT_ALIGN_INT
; R600: BIT_ALIGN_INT
; R600: BIT_ALIGN_INT
-; SI: V_ALIGNBIT_B32
-; SI: V_ALIGNBIT_B32
-; SI: V_ALIGNBIT_B32
-; SI: V_ALIGNBIT_B32
+; SI: v_alignbit_b32
+; SI: v_alignbit_b32
+; SI: v_alignbit_b32
+; SI: v_alignbit_b32
define void @rotr_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) {
entry:
%tmp0 = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, %y
diff --git a/test/CodeGen/R600/rsq.ll b/test/CodeGen/R600/rsq.ll
index 3069f62724b7..b8a23df63d83 100644
--- a/test/CodeGen/R600/rsq.ll
+++ b/test/CodeGen/R600/rsq.ll
@@ -1,12 +1,13 @@
-; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI %s
-; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI %s
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
declare float @llvm.sqrt.f32(float) nounwind readnone
declare double @llvm.sqrt.f64(double) nounwind readnone
-; SI-LABEL: @rsq_f32
-; SI: V_RSQ_F32_e32
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}rsq_f32:
+; SI: v_rsq_f32_e32
+; SI: s_endpgm
define void @rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%val = load float addrspace(1)* %in, align 4
%sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone
@@ -15,10 +16,10 @@ define void @rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noali
ret void
}
-; SI-LABEL: @rsq_f64
-; SI-UNSAFE: V_RSQ_F64_e32
-; SI-SAFE: V_SQRT_F64_e32
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}rsq_f64:
+; SI-UNSAFE: v_rsq_f64_e32
+; SI-SAFE: v_sqrt_f64_e32
+; SI: s_endpgm
define void @rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
%val = load double addrspace(1)* %in, align 4
%sqrt = call double @llvm.sqrt.f64(double %val) nounwind readnone
@@ -26,3 +27,48 @@ define void @rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noa
store double %div, double addrspace(1)* %out, align 4
ret void
}
+
+; SI-LABEL: {{^}}rsq_f32_sgpr:
+; SI: v_rsq_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
+; SI: s_endpgm
+define void @rsq_f32_sgpr(float addrspace(1)* noalias %out, float %val) nounwind {
+ %sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone
+ %div = fdiv float 1.0, %sqrt
+ store float %div, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; Recognize that this is rsqrt(a) * rcp(b) * c,
+; not 1 / ( 1 / sqrt(a)) * rcp(b) * c.
+
+; SI-LABEL: @rsqrt_fmul
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-DAG: buffer_load_dword [[C:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
+
+; SI-UNSAFE-DAG: v_rsq_f32_e32 [[RSQA:v[0-9]+]], [[A]]
+; SI-UNSAFE-DAG: v_rcp_f32_e32 [[RCPB:v[0-9]+]], [[B]]
+; SI-UNSAFE-DAG: v_mul_f32_e32 [[TMP:v[0-9]+]], [[RCPB]], [[RSQA]]
+; SI-UNSAFE: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
+; SI-UNSAFE: buffer_store_dword [[RESULT]]
+
+; SI-SAFE-NOT: v_rsq_f32
+
+; SI: s_endpgm
+define void @rsqrt_fmul(float addrspace(1)* %out, float addrspace(1)* %in) {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %out.gep = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2
+
+ %a = load float addrspace(1)* %gep.0
+ %b = load float addrspace(1)* %gep.1
+ %c = load float addrspace(1)* %gep.2
+
+ %x = call float @llvm.sqrt.f32(float %a)
+ %y = fmul float %x, %b
+ %z = fdiv float %c, %y
+ store float %z, float addrspace(1)* %out.gep
+ ret void
+}
diff --git a/test/CodeGen/R600/s_movk_i32.ll b/test/CodeGen/R600/s_movk_i32.ll
new file mode 100644
index 000000000000..469a6ba3a07e
--- /dev/null
+++ b/test/CodeGen/R600/s_movk_i32.ll
@@ -0,0 +1,184 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+; SI-LABEL: {{^}}s_movk_i32_k0:
+; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xffff{{$}}
+; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}}
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI: s_endpgm
+define void @s_movk_i32_k0(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+ %loada = load i64 addrspace(1)* %a, align 4
+ %or = or i64 %loada, 4295032831 ; ((1 << 16) - 1) | (1 << 32)
+ store i64 %or, i64 addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}s_movk_i32_k1:
+; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x7fff{{$}}
+; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}}
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI: s_endpgm
+define void @s_movk_i32_k1(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+ %loada = load i64 addrspace(1)* %a, align 4
+ %or = or i64 %loada, 4295000063 ; ((1 << 15) - 1) | (1 << 32)
+ store i64 %or, i64 addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}s_movk_i32_k2:
+; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x7fff{{$}}
+; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 64{{$}}
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI: s_endpgm
+define void @s_movk_i32_k2(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+ %loada = load i64 addrspace(1)* %a, align 4
+ %or = or i64 %loada, 274877939711 ; ((1 << 15) - 1) | (64 << 32)
+ store i64 %or, i64 addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}s_movk_i32_k3:
+; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0x8000{{$}}
+; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}}
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI: s_endpgm
+define void @s_movk_i32_k3(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+ %loada = load i64 addrspace(1)* %a, align 4
+ %or = or i64 %loada, 4295000064 ; (1 << 15) | (1 << 32)
+ store i64 %or, i64 addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}s_movk_i32_k4:
+; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0x20000{{$}}
+; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}}
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI: s_endpgm
+define void @s_movk_i32_k4(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+ %loada = load i64 addrspace(1)* %a, align 4
+ %or = or i64 %loada, 4295098368 ; (1 << 17) | (1 << 32)
+ store i64 %or, i64 addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}s_movk_i32_k5:
+; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0xffef{{$}}
+; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0xff00ffff{{$}}
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI: s_endpgm
+define void @s_movk_i32_k5(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+ %loada = load i64 addrspace(1)* %a, align 4
+ %or = or i64 %loada, 18374967954648334319 ; -17 & 0xff00ffffffffffff
+ store i64 %or, i64 addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}s_movk_i32_k6:
+; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x41{{$}}
+; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 63{{$}}
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI: s_endpgm
+define void @s_movk_i32_k6(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+ %loada = load i64 addrspace(1)* %a, align 4
+ %or = or i64 %loada, 270582939713 ; 65 | (63 << 32)
+ store i64 %or, i64 addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}s_movk_i32_k7:
+; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x2000{{$}}
+; SI-DAG: s_movk_i32 [[HI_S_IMM:s[0-9]+]], 0x4000{{$}}
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI: s_endpgm
+define void @s_movk_i32_k7(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+ %loada = load i64 addrspace(1)* %a, align 4
+ %or = or i64 %loada, 70368744185856; ((1 << 13)) | ((1 << 14) << 32)
+ store i64 %or, i64 addrspace(1)* %out
+ ret void
+}
+
+
+; SI-LABEL: {{^}}s_movk_i32_k8:
+; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8000{{$}}
+; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}}
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI: s_endpgm
+define void @s_movk_i32_k8(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+ %loada = load i64 addrspace(1)* %a, align 4
+ %or = or i64 %loada, 1229782942255906816 ; 0x11111111ffff8000
+ store i64 %or, i64 addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}s_movk_i32_k9:
+; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8001{{$}}
+; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}}
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI: s_endpgm
+define void @s_movk_i32_k9(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+ %loada = load i64 addrspace(1)* %a, align 4
+ %or = or i64 %loada, 1229782942255906817 ; 0x11111111ffff8001
+ store i64 %or, i64 addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}s_movk_i32_k10:
+; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8888{{$}}
+; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}}
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI: s_endpgm
+define void @s_movk_i32_k10(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+ %loada = load i64 addrspace(1)* %a, align 4
+ %or = or i64 %loada, 1229782942255909000 ; 0x11111111ffff8888
+ store i64 %or, i64 addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}s_movk_i32_k11:
+; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8fff{{$}}
+; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}}
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI: s_endpgm
+define void @s_movk_i32_k11(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+ %loada = load i64 addrspace(1)* %a, align 4
+ %or = or i64 %loada, 1229782942255910911 ; 0x11111111ffff8fff
+ store i64 %or, i64 addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}s_movk_i32_k12:
+; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xffff7001{{$}}
+; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}}
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI: s_endpgm
+define void @s_movk_i32_k12(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+ %loada = load i64 addrspace(1)* %a, align 4
+ %or = or i64 %loada, 1229782942255902721 ; 0x11111111ffff7001
+ store i64 %or, i64 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/saddo.ll b/test/CodeGen/R600/saddo.ll
index c80480e85512..2f5f9af838fa 100644
--- a/test/CodeGen/R600/saddo.ll
+++ b/test/CodeGen/R600/saddo.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s
declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
declare { i64, i1 } @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
-; FUNC-LABEL: @saddo_i64_zext
+; FUNC-LABEL: {{^}}saddo_i64_zext:
define void @saddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%sadd = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind
%val = extractvalue { i64, i1 } %sadd, 0
@@ -15,7 +15,7 @@ define void @saddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
ret void
}
-; FUNC-LABEL: @s_saddo_i32
+; FUNC-LABEL: {{^}}s_saddo_i32:
define void @s_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind {
%sadd = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) nounwind
%val = extractvalue { i32, i1 } %sadd, 0
@@ -25,7 +25,7 @@ define void @s_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32
ret void
}
-; FUNC-LABEL: @v_saddo_i32
+; FUNC-LABEL: {{^}}v_saddo_i32:
define void @v_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%a = load i32 addrspace(1)* %aptr, align 4
%b = load i32 addrspace(1)* %bptr, align 4
@@ -37,7 +37,7 @@ define void @v_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32
ret void
}
-; FUNC-LABEL: @s_saddo_i64
+; FUNC-LABEL: {{^}}s_saddo_i64:
define void @s_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind {
%sadd = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind
%val = extractvalue { i64, i1 } %sadd, 0
@@ -47,9 +47,9 @@ define void @s_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64
ret void
}
-; FUNC-LABEL: @v_saddo_i64
-; SI: V_ADD_I32
-; SI: V_ADDC_U32
+; FUNC-LABEL: {{^}}v_saddo_i64:
+; SI: v_add_i32
+; SI: v_addc_u32
define void @v_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
%a = load i64 addrspace(1)* %aptr, align 4
%b = load i64 addrspace(1)* %bptr, align 4
diff --git a/test/CodeGen/R600/salu-to-valu.ll b/test/CodeGen/R600/salu-to-valu.ll
index e7719b6feb3b..dfb181da1801 100644
--- a/test/CodeGen/R600/salu-to-valu.ll
+++ b/test/CodeGen/R600/salu-to-valu.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
; In this test both the pointer and the offset operands to the
; BUFFER_LOAD instructions end up being stored in vgprs. This
@@ -7,15 +7,15 @@
; sgpr register pair and use that for the pointer operand
; (low 64-bits of srsrc).
-; CHECK-LABEL: @mubuf
+; CHECK-LABEL: {{^}}mubuf:
-; Make sure we aren't using VGPRs for the source operand of S_MOV_B64
-; CHECK-NOT: S_MOV_B64 s[{{[0-9]+:[0-9]+}}], v
+; Make sure we aren't using VGPRs for the source operand of s_mov_b64
+; CHECK-NOT: s_mov_b64 s[{{[0-9]+:[0-9]+}}], v
; Make sure we aren't using VGPR's for the srsrc operand of BUFFER_LOAD_*
; instructions
-; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
-; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
+; CHECK: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64
+; CHECK: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64
define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
entry:
%0 = call i32 @llvm.r600.read.tidig.x() #1
@@ -49,9 +49,9 @@ attributes #1 = { nounwind readnone }
; Test moving an SMRD instruction to the VALU
-; CHECK-LABEL: @smrd_valu
-; CHECK: BUFFER_LOAD_DWORD [[OUT:v[0-9]+]]
-; CHECK: BUFFER_STORE_DWORD [[OUT]]
+; CHECK-LABEL: {{^}}smrd_valu:
+; CHECK: buffer_load_dword [[OUT:v[0-9]+]]
+; CHECK: buffer_store_dword [[OUT]]
define void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 addrspace(1)* %out) {
entry:
@@ -77,8 +77,8 @@ endif:
; Test moving ann SMRD with an immediate offset to the VALU
-; CHECK-LABEL: @smrd_valu2
-; CHECK: BUFFER_LOAD_DWORD
+; CHECK-LABEL: {{^}}smrd_valu2:
+; CHECK: buffer_load_dword
define void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) {
entry:
%0 = call i32 @llvm.r600.read.tidig.x() nounwind readnone
@@ -88,3 +88,31 @@ entry:
store i32 %3, i32 addrspace(1)* %out
ret void
}
+
+; CHECK-LABEL: {{^}}s_load_imm_v8i32:
+; CHECK: buffer_load_dwordx4
+; CHECK: buffer_load_dwordx4
+define void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) {
+entry:
+ %tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1
+ %tmp1 = getelementptr inbounds i32 addrspace(2)* %in, i32 %tmp0
+ %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)*
+ %tmp3 = load <8 x i32> addrspace(2)* %tmp2, align 4
+ store <8 x i32> %tmp3, <8 x i32> addrspace(1)* %out, align 32
+ ret void
+}
+
+; CHECK-LABEL: {{^}}s_load_imm_v16i32:
+; CHECK: buffer_load_dwordx4
+; CHECK: buffer_load_dwordx4
+; CHECK: buffer_load_dwordx4
+; CHECK: buffer_load_dwordx4
+define void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) {
+entry:
+ %tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1
+ %tmp1 = getelementptr inbounds i32 addrspace(2)* %in, i32 %tmp0
+ %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <16 x i32> addrspace(2)*
+ %tmp3 = load <16 x i32> addrspace(2)* %tmp2, align 4
+ store <16 x i32> %tmp3, <16 x i32> addrspace(1)* %out, align 32
+ ret void
+}
diff --git a/test/CodeGen/R600/scalar_to_vector.ll b/test/CodeGen/R600/scalar_to_vector.ll
index bcccb065818e..7ad964eb1623 100644
--- a/test/CodeGen/R600/scalar_to_vector.ll
+++ b/test/CodeGen/R600/scalar_to_vector.ll
@@ -1,14 +1,14 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; FUNC-LABEL: @scalar_to_vector_v2i32
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_LSHRREV_B32_e32 [[RESULT:v[0-9]+]], 16, [[VAL]]
-; SI: BUFFER_STORE_SHORT [[RESULT]]
-; SI: BUFFER_STORE_SHORT [[RESULT]]
-; SI: BUFFER_STORE_SHORT [[RESULT]]
-; SI: BUFFER_STORE_SHORT [[RESULT]]
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}scalar_to_vector_v2i32:
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_lshrrev_b32_e32 [[RESULT:v[0-9]+]], 16, [[VAL]]
+; SI: buffer_store_short [[RESULT]]
+; SI: buffer_store_short [[RESULT]]
+; SI: buffer_store_short [[RESULT]]
+; SI: buffer_store_short [[RESULT]]
+; SI: s_endpgm
define void @scalar_to_vector_v2i32(<4 x i16> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%tmp1 = load i32 addrspace(1)* %in, align 4
%bc = bitcast i32 %tmp1 to <2 x i16>
@@ -17,14 +17,14 @@ define void @scalar_to_vector_v2i32(<4 x i16> addrspace(1)* %out, i32 addrspace(
ret void
}
-; FUNC-LABEL: @scalar_to_vector_v2f32
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_LSHRREV_B32_e32 [[RESULT:v[0-9]+]], 16, [[VAL]]
-; SI: BUFFER_STORE_SHORT [[RESULT]]
-; SI: BUFFER_STORE_SHORT [[RESULT]]
-; SI: BUFFER_STORE_SHORT [[RESULT]]
-; SI: BUFFER_STORE_SHORT [[RESULT]]
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}scalar_to_vector_v2f32:
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_lshrrev_b32_e32 [[RESULT:v[0-9]+]], 16, [[VAL]]
+; SI: buffer_store_short [[RESULT]]
+; SI: buffer_store_short [[RESULT]]
+; SI: buffer_store_short [[RESULT]]
+; SI: buffer_store_short [[RESULT]]
+; SI: s_endpgm
define void @scalar_to_vector_v2f32(<4 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
%tmp1 = load float addrspace(1)* %in, align 4
%bc = bitcast float %tmp1 to <2 x i16>
diff --git a/test/CodeGen/R600/schedule-global-loads.ll b/test/CodeGen/R600/schedule-global-loads.ll
new file mode 100644
index 000000000000..b6437d25b8cb
--- /dev/null
+++ b/test/CodeGen/R600/schedule-global-loads.ll
@@ -0,0 +1,41 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
+
+
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; FIXME: This currently doesn't do a great job of clustering the
+; loads, which end up with extra moves between them. Right now, it
+; seems the only things areLoadsFromSameBasePtr is accomplishing is
+; ordering the loads so that the lower address loads come first.
+
+; FUNC-LABEL: {{^}}cluster_global_arg_loads:
+; SI-DAG: buffer_load_dword [[REG0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; SI-DAG: buffer_load_dword [[REG1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4
+; SI: buffer_store_dword [[REG0]]
+; SI: buffer_store_dword [[REG1]]
+define void @cluster_global_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %ptr) #0 {
+ %load0 = load i32 addrspace(1)* %ptr, align 4
+ %gep = getelementptr i32 addrspace(1)* %ptr, i32 1
+ %load1 = load i32 addrspace(1)* %gep, align 4
+ store i32 %load0, i32 addrspace(1)* %out0, align 4
+ store i32 %load1, i32 addrspace(1)* %out1, align 4
+ ret void
+}
+
+; Test for a crach in SIInstrInfo::areLoadsFromSameBasePtr() when checking
+; an MUBUF load which does not have a vaddr operand.
+; FUNC-LABEL: {{^}}same_base_ptr_crash:
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+define void @same_base_ptr_crash(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset) {
+entry:
+ %out1 = getelementptr i32 addrspace(1)* %out, i32 %offset
+ %tmp0 = load i32 addrspace(1)* %out
+ %tmp1 = load i32 addrspace(1)* %out1
+ %tmp2 = add i32 %tmp0, %tmp1
+ store i32 %tmp2, i32 addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/schedule-kernel-arg-loads.ll b/test/CodeGen/R600/schedule-kernel-arg-loads.ll
new file mode 100644
index 000000000000..215ebfccf5b4
--- /dev/null
+++ b/test/CodeGen/R600/schedule-kernel-arg-loads.ll
@@ -0,0 +1,12 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
+
+; FUNC-LABEL: {{^}}cluster_arg_loads:
+; SI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x9
+; SI-NEXT: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xe
+define void @cluster_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %x, i32 %y) nounwind {
+ store i32 %x, i32 addrspace(1)* %out0, align 4
+ store i32 %y, i32 addrspace(1)* %out1, align 4
+ ret void
+}
diff --git a/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll b/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll
index 3d2142d53ecf..dee90f311f15 100644
--- a/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll
+++ b/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll
@@ -1,11 +1,11 @@
; XFAIL: *
; REQUIRES: asserts
-; RUN: llc -O0 -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck %s -check-prefix=SI
+; RUN: llc -O0 -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck %s -check-prefix=SI
declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
-; SI-LABEL: @main(
+; SI-LABEL: {{^}}main(
define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
main_body:
%0 = extractelement <4 x float> %reg1, i32 0
diff --git a/test/CodeGen/R600/sdiv.ll b/test/CodeGen/R600/sdiv.ll
index e922d5c18680..c635c0569e84 100644
--- a/test/CodeGen/R600/sdiv.ll
+++ b/test/CodeGen/R600/sdiv.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; The code generated by sdiv is long and complex and may frequently change.
@@ -10,7 +10,7 @@
; This was fixed by adding an additional pattern in R600Instructions.td to
; match this pattern with a CNDGE_INT.
-; FUNC-LABEL: @sdiv_i32
+; FUNC-LABEL: {{^}}sdiv_i32:
; EG: CF_END
define void @sdiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
@@ -21,7 +21,7 @@ define void @sdiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
ret void
}
-; FUNC-LABEL: @sdiv_i32_4
+; FUNC-LABEL: {{^}}sdiv_i32_4:
define void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%num = load i32 addrspace(1) * %in
%result = sdiv i32 %num, 4
@@ -32,16 +32,16 @@ define void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
; Multiply by a weird constant to make sure setIntDivIsCheap is
; working.
-; FUNC-LABEL: @slow_sdiv_i32_3435
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_MOV_B32_e32 [[MAGIC:v[0-9]+]], 0x98a1930b
-; SI: V_MUL_HI_I32 [[TMP:v[0-9]+]], [[VAL]], [[MAGIC]]
-; SI: V_ADD_I32
-; SI: V_LSHRREV_B32
-; SI: V_ASHRREV_I32
-; SI: V_ADD_I32
-; SI: BUFFER_STORE_DWORD
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}slow_sdiv_i32_3435:
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x98a1930b
+; SI: v_mul_hi_i32 [[TMP:v[0-9]+]], [[MAGIC]], [[VAL]]
+; SI: v_add_i32
+; SI: v_lshrrev_b32
+; SI: v_ashrrev_i32
+; SI: v_add_i32
+; SI: buffer_store_dword
+; SI: s_endpgm
define void @slow_sdiv_i32_3435(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%num = load i32 addrspace(1) * %in
%result = sdiv i32 %num, 3435
diff --git a/test/CodeGen/R600/sdivrem24.ll b/test/CodeGen/R600/sdivrem24.ll
new file mode 100644
index 000000000000..bb90343dbfe6
--- /dev/null
+++ b/test/CodeGen/R600/sdivrem24.ll
@@ -0,0 +1,238 @@
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}sdiv24_i8:
+; SI: v_cvt_f32_i32
+; SI: v_cvt_f32_i32
+; SI: v_rcp_f32
+; SI: v_cvt_i32_f32
+
+; EG: INT_TO_FLT
+; EG-DAG: INT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_INT
+define void @sdiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
+ %den_ptr = getelementptr i8 addrspace(1)* %in, i8 1
+ %num = load i8 addrspace(1) * %in
+ %den = load i8 addrspace(1) * %den_ptr
+ %result = sdiv i8 %num, %den
+ store i8 %result, i8 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sdiv24_i16:
+; SI: v_cvt_f32_i32
+; SI: v_cvt_f32_i32
+; SI: v_rcp_f32
+; SI: v_cvt_i32_f32
+
+; EG: INT_TO_FLT
+; EG-DAG: INT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_INT
+define void @sdiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
+ %den_ptr = getelementptr i16 addrspace(1)* %in, i16 1
+ %num = load i16 addrspace(1) * %in, align 2
+ %den = load i16 addrspace(1) * %den_ptr, align 2
+ %result = sdiv i16 %num, %den
+ store i16 %result, i16 addrspace(1)* %out, align 2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sdiv24_i32:
+; SI: v_cvt_f32_i32
+; SI: v_cvt_f32_i32
+; SI: v_rcp_f32
+; SI: v_cvt_i32_f32
+
+; EG: INT_TO_FLT
+; EG-DAG: INT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_INT
+define void @sdiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+ %num = load i32 addrspace(1) * %in, align 4
+ %den = load i32 addrspace(1) * %den_ptr, align 4
+ %num.i24.0 = shl i32 %num, 8
+ %den.i24.0 = shl i32 %den, 8
+ %num.i24 = ashr i32 %num.i24.0, 8
+ %den.i24 = ashr i32 %den.i24.0, 8
+ %result = sdiv i32 %num.i24, %den.i24
+ store i32 %result, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sdiv25_i32:
+; SI-NOT: v_cvt_f32_i32
+; SI-NOT: v_rcp_f32
+
+; EG-NOT: INT_TO_FLT
+; EG-NOT: RECIP_IEEE
+define void @sdiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+ %num = load i32 addrspace(1) * %in, align 4
+ %den = load i32 addrspace(1) * %den_ptr, align 4
+ %num.i24.0 = shl i32 %num, 7
+ %den.i24.0 = shl i32 %den, 7
+ %num.i24 = ashr i32 %num.i24.0, 7
+ %den.i24 = ashr i32 %den.i24.0, 7
+ %result = sdiv i32 %num.i24, %den.i24
+ store i32 %result, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_no_sdiv24_i32_1:
+; SI-NOT: v_cvt_f32_i32
+; SI-NOT: v_rcp_f32
+
+; EG-NOT: INT_TO_FLT
+; EG-NOT: RECIP_IEEE
+define void @test_no_sdiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+ %num = load i32 addrspace(1) * %in, align 4
+ %den = load i32 addrspace(1) * %den_ptr, align 4
+ %num.i24.0 = shl i32 %num, 8
+ %den.i24.0 = shl i32 %den, 7
+ %num.i24 = ashr i32 %num.i24.0, 8
+ %den.i24 = ashr i32 %den.i24.0, 7
+ %result = sdiv i32 %num.i24, %den.i24
+ store i32 %result, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_no_sdiv24_i32_2:
+; SI-NOT: v_cvt_f32_i32
+; SI-NOT: v_rcp_f32
+
+; EG-NOT: INT_TO_FLT
+; EG-NOT: RECIP_IEEE
+define void @test_no_sdiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+ %num = load i32 addrspace(1) * %in, align 4
+ %den = load i32 addrspace(1) * %den_ptr, align 4
+ %num.i24.0 = shl i32 %num, 7
+ %den.i24.0 = shl i32 %den, 8
+ %num.i24 = ashr i32 %num.i24.0, 7
+ %den.i24 = ashr i32 %den.i24.0, 8
+ %result = sdiv i32 %num.i24, %den.i24
+ store i32 %result, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}srem24_i8:
+; SI: v_cvt_f32_i32
+; SI: v_cvt_f32_i32
+; SI: v_rcp_f32
+; SI: v_cvt_i32_f32
+
+; EG: INT_TO_FLT
+; EG-DAG: INT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_INT
+define void @srem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
+ %den_ptr = getelementptr i8 addrspace(1)* %in, i8 1
+ %num = load i8 addrspace(1) * %in
+ %den = load i8 addrspace(1) * %den_ptr
+ %result = srem i8 %num, %den
+ store i8 %result, i8 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}srem24_i16:
+; SI: v_cvt_f32_i32
+; SI: v_cvt_f32_i32
+; SI: v_rcp_f32
+; SI: v_cvt_i32_f32
+
+; EG: INT_TO_FLT
+; EG-DAG: INT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_INT
+define void @srem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
+ %den_ptr = getelementptr i16 addrspace(1)* %in, i16 1
+ %num = load i16 addrspace(1) * %in, align 2
+ %den = load i16 addrspace(1) * %den_ptr, align 2
+ %result = srem i16 %num, %den
+ store i16 %result, i16 addrspace(1)* %out, align 2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}srem24_i32:
+; SI: v_cvt_f32_i32
+; SI: v_cvt_f32_i32
+; SI: v_rcp_f32
+; SI: v_cvt_i32_f32
+
+; EG: INT_TO_FLT
+; EG-DAG: INT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_INT
+define void @srem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+ %num = load i32 addrspace(1) * %in, align 4
+ %den = load i32 addrspace(1) * %den_ptr, align 4
+ %num.i24.0 = shl i32 %num, 8
+ %den.i24.0 = shl i32 %den, 8
+ %num.i24 = ashr i32 %num.i24.0, 8
+ %den.i24 = ashr i32 %den.i24.0, 8
+ %result = srem i32 %num.i24, %den.i24
+ store i32 %result, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}srem25_i32:
+; SI-NOT: v_cvt_f32_i32
+; SI-NOT: v_rcp_f32
+
+; EG-NOT: INT_TO_FLT
+; EG-NOT: RECIP_IEEE
+define void @srem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+ %num = load i32 addrspace(1) * %in, align 4
+ %den = load i32 addrspace(1) * %den_ptr, align 4
+ %num.i24.0 = shl i32 %num, 7
+ %den.i24.0 = shl i32 %den, 7
+ %num.i24 = ashr i32 %num.i24.0, 7
+ %den.i24 = ashr i32 %den.i24.0, 7
+ %result = srem i32 %num.i24, %den.i24
+ store i32 %result, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_no_srem24_i32_1:
+; SI-NOT: v_cvt_f32_i32
+; SI-NOT: v_rcp_f32
+
+; EG-NOT: INT_TO_FLT
+; EG-NOT: RECIP_IEEE
+define void @test_no_srem24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+ %num = load i32 addrspace(1) * %in, align 4
+ %den = load i32 addrspace(1) * %den_ptr, align 4
+ %num.i24.0 = shl i32 %num, 8
+ %den.i24.0 = shl i32 %den, 7
+ %num.i24 = ashr i32 %num.i24.0, 8
+ %den.i24 = ashr i32 %den.i24.0, 7
+ %result = srem i32 %num.i24, %den.i24
+ store i32 %result, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_no_srem24_i32_2:
+; SI-NOT: v_cvt_f32_i32
+; SI-NOT: v_rcp_f32
+
+; EG-NOT: INT_TO_FLT
+; EG-NOT: RECIP_IEEE
+define void @test_no_srem24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+ %num = load i32 addrspace(1) * %in, align 4
+ %den = load i32 addrspace(1) * %den_ptr, align 4
+ %num.i24.0 = shl i32 %num, 7
+ %den.i24.0 = shl i32 %den, 8
+ %num.i24 = ashr i32 %num.i24.0, 7
+ %den.i24 = ashr i32 %den.i24.0, 8
+ %result = srem i32 %num.i24, %den.i24
+ store i32 %result, i32 addrspace(1)* %out, align 4
+ ret void
+}
diff --git a/test/CodeGen/R600/select-i1.ll b/test/CodeGen/R600/select-i1.ll
index 009dd7f68dea..bb778dd264d2 100644
--- a/test/CodeGen/R600/select-i1.ll
+++ b/test/CodeGen/R600/select-i1.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FIXME: This should go in existing select.ll test, except the current testcase there is broken on SI
-; FUNC-LABEL: @select_i1
-; SI: V_CNDMASK_B32
-; SI-NOT: V_CNDMASK_B32
+; FUNC-LABEL: {{^}}select_i1:
+; SI: v_cndmask_b32
+; SI-NOT: v_cndmask_b32
define void @select_i1(i1 addrspace(1)* %out, i32 %cond, i1 %a, i1 %b) nounwind {
%cmp = icmp ugt i32 %cond, 5
%sel = select i1 %cmp, i1 %a, i1 %b
diff --git a/test/CodeGen/R600/select-vectors.ll b/test/CodeGen/R600/select-vectors.ll
index 94605fe08ad8..d982603335f2 100644
--- a/test/CodeGen/R600/select-vectors.ll
+++ b/test/CodeGen/R600/select-vectors.ll
@@ -1,14 +1,14 @@
-; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; Test expansion of scalar selects on vectors.
; Evergreen not enabled since it seems to be having problems with doubles.
-; FUNC-LABEL: @select_v4i8
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
+; FUNC-LABEL: {{^}}select_v4i8:
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
define void @select_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b, i8 %c) nounwind {
%cmp = icmp eq i8 %c, 0
%select = select i1 %cmp, <4 x i8> %a, <4 x i8> %b
@@ -16,11 +16,11 @@ define void @select_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b,
ret void
}
-; FUNC-LABEL: @select_v4i16
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
+; FUNC-LABEL: {{^}}select_v4i16:
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
define void @select_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b, i32 %c) nounwind {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <4 x i16> %a, <4 x i16> %b
@@ -28,10 +28,10 @@ define void @select_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16>
ret void
}
-; FUNC-LABEL: @select_v2i32
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: BUFFER_STORE_DWORDX2
+; FUNC-LABEL: {{^}}select_v2i32:
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: buffer_store_dwordx2
define void @select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b, i32 %c) nounwind {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <2 x i32> %a, <2 x i32> %b
@@ -39,12 +39,12 @@ define void @select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32>
ret void
}
-; FUNC-LABEL: @select_v4i32
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: BUFFER_STORE_DWORDX4
+; FUNC-LABEL: {{^}}select_v4i32:
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: buffer_store_dwordx4
define void @select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, i32 %c) nounwind {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <4 x i32> %a, <4 x i32> %b
@@ -52,15 +52,15 @@ define void @select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32>
ret void
}
-; FUNC-LABEL: @select_v8i32
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
+; FUNC-LABEL: {{^}}select_v8i32:
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
define void @select_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b, i32 %c) nounwind {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <8 x i32> %a, <8 x i32> %b
@@ -68,8 +68,8 @@ define void @select_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32>
ret void
}
-; FUNC-LABEL: @select_v2f32
-; SI: BUFFER_STORE_DWORDX2
+; FUNC-LABEL: {{^}}select_v2f32:
+; SI: buffer_store_dwordx2
define void @select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b, i32 %c) nounwind {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <2 x float> %a, <2 x float> %b
@@ -77,8 +77,8 @@ define void @select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x f
ret void
}
-; FUNC-LABEL: @select_v4f32
-; SI: BUFFER_STORE_DWORDX4
+; FUNC-LABEL: {{^}}select_v4f32:
+; SI: buffer_store_dwordx4
define void @select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b, i32 %c) nounwind {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <4 x float> %a, <4 x float> %b
@@ -86,15 +86,15 @@ define void @select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x f
ret void
}
-; FUNC-LABEL: @select_v8f32
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
+; FUNC-LABEL: {{^}}select_v8f32:
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
define void @select_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b, i32 %c) nounwind {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <8 x float> %a, <8 x float> %b
@@ -102,11 +102,11 @@ define void @select_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x f
ret void
}
-; FUNC-LABEL: @select_v2f64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
+; FUNC-LABEL: {{^}}select_v2f64:
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
define void @select_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b, i32 %c) nounwind {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <2 x double> %a, <2 x double> %b
@@ -114,15 +114,15 @@ define void @select_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x
ret void
}
-; FUNC-LABEL: @select_v4f64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
+; FUNC-LABEL: {{^}}select_v4f64:
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
define void @select_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b, i32 %c) nounwind {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <4 x double> %a, <4 x double> %b
@@ -130,23 +130,23 @@ define void @select_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x
ret void
}
-; FUNC-LABEL: @select_v8f64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
+; FUNC-LABEL: {{^}}select_v8f64:
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
define void @select_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b, i32 %c) nounwind {
%cmp = icmp eq i32 %c, 0
%select = select i1 %cmp, <8 x double> %a, <8 x double> %b
diff --git a/test/CodeGen/R600/select.ll b/test/CodeGen/R600/select.ll
index 7d5156834b9d..45f3cd5a7ac5 100644
--- a/test/CodeGen/R600/select.ll
+++ b/test/CodeGen/R600/select.ll
@@ -7,7 +7,7 @@
; In order to avoid the select_cc optimization, this test case calculates the
; condition for the select in a separate basic block.
-; FUNC-LABEL: @select
+; FUNC-LABEL: {{^}}select:
; EG-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.X
; EG-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.X
; EG-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
diff --git a/test/CodeGen/R600/select64.ll b/test/CodeGen/R600/select64.ll
index dba25e3bd21e..f48ec2135de4 100644
--- a/test/CodeGen/R600/select64.ll
+++ b/test/CodeGen/R600/select64.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
-; CHECK-LABEL: @select0
+; CHECK-LABEL: {{^}}select0:
; i64 select should be split into two i32 selects, and we shouldn't need
; to use a shfit to extract the hi dword of the input.
-; CHECK-NOT: S_LSHR_B64
-; CHECK: V_CNDMASK
-; CHECK: V_CNDMASK
+; CHECK-NOT: s_lshr_b64
+; CHECK: v_cndmask
+; CHECK: v_cndmask
define void @select0(i64 addrspace(1)* %out, i32 %cond, i64 %in) {
entry:
%0 = icmp ugt i32 %cond, 5
@@ -14,9 +14,9 @@ entry:
ret void
}
-; CHECK-LABEL: @select_trunc_i64
-; CHECK: V_CNDMASK_B32
-; CHECK-NOT: V_CNDMASK_B32
+; CHECK-LABEL: {{^}}select_trunc_i64:
+; CHECK: v_cndmask_b32
+; CHECK-NOT: v_cndmask_b32
define void @select_trunc_i64(i32 addrspace(1)* %out, i32 %cond, i64 %in) nounwind {
%cmp = icmp ugt i32 %cond, 5
%sel = select i1 %cmp, i64 0, i64 %in
@@ -25,9 +25,9 @@ define void @select_trunc_i64(i32 addrspace(1)* %out, i32 %cond, i64 %in) nounwi
ret void
}
-; CHECK-LABEL: @select_trunc_i64_2
-; CHECK: V_CNDMASK_B32
-; CHECK-NOT: V_CNDMASK_B32
+; CHECK-LABEL: {{^}}select_trunc_i64_2:
+; CHECK: v_cndmask_b32
+; CHECK-NOT: v_cndmask_b32
define void @select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 %a, i64 %b) nounwind {
%cmp = icmp ugt i32 %cond, 5
%sel = select i1 %cmp, i64 %a, i64 %b
@@ -36,9 +36,9 @@ define void @select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 %a, i64 %
ret void
}
-; CHECK-LABEL: @v_select_trunc_i64_2
-; CHECK: V_CNDMASK_B32
-; CHECK-NOT: V_CNDMASK_B32
+; CHECK-LABEL: {{^}}v_select_trunc_i64_2:
+; CHECK: v_cndmask_b32
+; CHECK-NOT: v_cndmask_b32
define void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
%cmp = icmp ugt i32 %cond, 5
%a = load i64 addrspace(1)* %aptr, align 8
diff --git a/test/CodeGen/R600/selectcc-opt.ll b/test/CodeGen/R600/selectcc-opt.ll
index bdb6867850ba..feaea87592b6 100644
--- a/test/CodeGen/R600/selectcc-opt.ll
+++ b/test/CodeGen/R600/selectcc-opt.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; FUNC-LABEL: @test_a
+; FUNC-LABEL: {{^}}test_a:
; EG-NOT: CND
; EG: SET{{[NEQGTL]+}}_DX10
@@ -30,7 +30,7 @@ ENDIF:
; Same as test_a, but the branch labels are swapped to produce the inverse cc
; for the icmp instruction
-; EG-LABEL: @test_b
+; EG-LABEL: {{^}}test_b:
; EG: SET{{[GTEQN]+}}_DX10
; EG-NEXT: PRED_
; EG-NEXT: ALU clause starting
@@ -56,7 +56,7 @@ ENDIF:
}
; Test a CND*_INT instruction with float true/false values
-; EG-LABEL: @test_c
+; EG-LABEL: {{^}}test_c:
; EG: CND{{[GTE]+}}_INT
define void @test_c(float addrspace(1)* %out, i32 %in) {
entry:
@@ -66,11 +66,11 @@ entry:
ret void
}
-; FUNC-LABEL: @selectcc_bool
-; SI: V_CMP_NE_I32
-; SI-NEXT: V_CNDMASK_B32_e64
-; SI-NOT: CMP
-; SI-NOT: CNDMASK
+; FUNC-LABEL: {{^}}selectcc_bool:
+; SI: v_cmp_ne_i32
+; SI-NEXT: v_cndmask_b32_e64
+; SI-NOT: cmp
+; SI-NOT: cndmask
define void @selectcc_bool(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp ne i32 %a, %b
%ext = select i1 %icmp0, i32 -1, i32 0
diff --git a/test/CodeGen/R600/selectcc.ll b/test/CodeGen/R600/selectcc.ll
index a8f57cf1b572..d0ae5bfe91ae 100644
--- a/test/CodeGen/R600/selectcc.ll
+++ b/test/CodeGen/R600/selectcc.ll
@@ -1,15 +1,15 @@
; RUN: llc -verify-machineinstrs -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; FUNC-LABEL: @selectcc_i64
+; FUNC-LABEL: {{^}}selectcc_i64:
; EG: XOR_INT
; EG: XOR_INT
; EG: OR_INT
; EG: CNDE_INT
; EG: CNDE_INT
-; SI: V_CMP_EQ_I64
-; SI: V_CNDMASK
-; SI: V_CNDMASK
+; SI: v_cmp_eq_i64
+; SI: v_cndmask
+; SI: v_cndmask
define void @selectcc_i64(i64 addrspace(1) * %out, i64 %lhs, i64 %rhs, i64 %true, i64 %false) {
entry:
%0 = icmp eq i64 %lhs, %rhs
diff --git a/test/CodeGen/R600/set-dx10.ll b/test/CodeGen/R600/set-dx10.ll
index 5c7d4998d07c..53694dcffa66 100644
--- a/test/CodeGen/R600/set-dx10.ll
+++ b/test/CodeGen/R600/set-dx10.ll
@@ -4,7 +4,7 @@
; to store integer true (-1) and false (0) values are lowered to one of the
; SET*DX10 instructions.
-; CHECK: @fcmp_une_select_fptosi
+; CHECK: {{^}}fcmp_une_select_fptosi:
; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00)
@@ -18,7 +18,7 @@ entry:
ret void
}
-; CHECK: @fcmp_une_select_i32
+; CHECK: {{^}}fcmp_une_select_i32:
; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00)
@@ -30,7 +30,7 @@ entry:
ret void
}
-; CHECK: @fcmp_oeq_select_fptosi
+; CHECK: {{^}}fcmp_oeq_select_fptosi:
; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00)
@@ -44,7 +44,7 @@ entry:
ret void
}
-; CHECK: @fcmp_oeq_select_i32
+; CHECK: {{^}}fcmp_oeq_select_i32:
; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00)
@@ -56,7 +56,7 @@ entry:
ret void
}
-; CHECK: @fcmp_ogt_select_fptosi
+; CHECK: {{^}}fcmp_ogt_select_fptosi:
; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00)
@@ -70,7 +70,7 @@ entry:
ret void
}
-; CHECK: @fcmp_ogt_select_i32
+; CHECK: {{^}}fcmp_ogt_select_i32:
; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00)
@@ -82,7 +82,7 @@ entry:
ret void
}
-; CHECK: @fcmp_oge_select_fptosi
+; CHECK: {{^}}fcmp_oge_select_fptosi:
; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00)
@@ -96,7 +96,7 @@ entry:
ret void
}
-; CHECK: @fcmp_oge_select_i32
+; CHECK: {{^}}fcmp_oge_select_i32:
; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00)
@@ -108,7 +108,7 @@ entry:
ret void
}
-; CHECK: @fcmp_ole_select_fptosi
+; CHECK: {{^}}fcmp_ole_select_fptosi:
; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00)
@@ -122,7 +122,7 @@ entry:
ret void
}
-; CHECK: @fcmp_ole_select_i32
+; CHECK: {{^}}fcmp_ole_select_i32:
; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00)
@@ -134,7 +134,7 @@ entry:
ret void
}
-; CHECK: @fcmp_olt_select_fptosi
+; CHECK: {{^}}fcmp_olt_select_fptosi:
; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00)
@@ -148,7 +148,7 @@ entry:
ret void
}
-; CHECK: @fcmp_olt_select_i32
+; CHECK: {{^}}fcmp_olt_select_i32:
; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00)
diff --git a/test/CodeGen/R600/setcc-equivalent.ll b/test/CodeGen/R600/setcc-equivalent.ll
index f796748fcefe..11ea793650c4 100644
--- a/test/CodeGen/R600/setcc-equivalent.ll
+++ b/test/CodeGen/R600/setcc-equivalent.ll
@@ -1,7 +1,6 @@
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
-; XFAIL: *
-; EG-LABEL: @and_setcc_setcc_i32
+; EG-LABEL: {{^}}and_setcc_setcc_i32:
; EG: AND_INT
; EG-NEXT: SETE_INT
define void @and_setcc_setcc_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
@@ -13,7 +12,7 @@ define void @and_setcc_setcc_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
ret void
}
-; EG-LABEL: @and_setcc_setcc_v4i32
+; EG-LABEL: {{^}}and_setcc_setcc_v4i32:
; EG: AND_INT
; EG: AND_INT
; EG: SETE_INT
@@ -28,4 +27,4 @@ define void @and_setcc_setcc_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <
%ext = sext <4 x i1> %and to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out, align 4
ret void
-} \ No newline at end of file
+}
diff --git a/test/CodeGen/R600/setcc-opt.ll b/test/CodeGen/R600/setcc-opt.ll
index 8e831e409191..a44c89f72cf5 100644
--- a/test/CodeGen/R600/setcc-opt.ll
+++ b/test/CodeGen/R600/setcc-opt.ll
@@ -1,15 +1,233 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; SI-LABEL: @sext_bool_icmp_ne
-; SI: V_CMP_NE_I32
-; SI-NEXT: V_CNDMASK_B32
-; SI-NOT: V_CMP_NE_I32
-; SI-NOT: V_CNDMASK_B32
-; SI: S_ENDPGM
-define void @sext_bool_icmp_ne(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+; FUNC-LABEL: {{^}}sext_bool_icmp_eq_0:
+; SI-NOT: v_cmp
+; SI: v_cmp_ne_i32_e32 vcc,
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; SI-NEXT:buffer_store_byte [[RESULT]]
+; SI-NEXT: s_endpgm
+
+; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W
+; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1
+define void @sext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %icmp0 = icmp eq i32 %a, %b
+ %ext = sext i1 %icmp0 to i32
+ %icmp1 = icmp eq i32 %ext, 0
+ store i1 %icmp1, i1 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sext_bool_icmp_ne_0:
+; SI-NOT: v_cmp
+; SI: v_cmp_ne_i32_e32 vcc,
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; SI-NEXT: buffer_store_byte [[RESULT]]
+; SI-NEXT: s_endpgm
+
+; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W
+; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1
+define void @sext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp ne i32 %a, %b
%ext = sext i1 %icmp0 to i32
%icmp1 = icmp ne i32 %ext, 0
store i1 %icmp1, i1 addrspace(1)* %out
ret void
}
+
+; This really folds away to false
+; FUNC-LABEL: {{^}}sext_bool_icmp_eq_1:
+; SI: v_cmp_eq_i32_e32 vcc,
+; SI-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, vcc
+; SI-NEXT: v_cmp_eq_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[TMP]], 1{{$}}
+; SI-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1,
+; SI-NEXT: buffer_store_byte [[TMP]]
+; SI-NEXT: s_endpgm
+define void @sext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %icmp0 = icmp eq i32 %a, %b
+ %ext = sext i1 %icmp0 to i32
+ %icmp1 = icmp eq i32 %ext, 1
+ store i1 %icmp1, i1 addrspace(1)* %out
+ ret void
+}
+
+; This really folds away to true
+; FUNC-LABEL: {{^}}sext_bool_icmp_ne_1:
+; SI: v_cmp_ne_i32_e32 vcc,
+; SI-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, vcc
+; SI-NEXT: v_cmp_ne_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[TMP]], 1{{$}}
+; SI-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1,
+; SI-NEXT: buffer_store_byte [[TMP]]
+; SI-NEXT: s_endpgm
+define void @sext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %icmp0 = icmp ne i32 %a, %b
+ %ext = sext i1 %icmp0 to i32
+ %icmp1 = icmp ne i32 %ext, 1
+ store i1 %icmp1, i1 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zext_bool_icmp_eq_0:
+; SI-NOT: v_cmp
+; SI: v_cmp_ne_i32_e32 vcc,
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; SI-NEXT: buffer_store_byte [[RESULT]]
+; SI-NEXT: s_endpgm
+define void @zext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %icmp0 = icmp eq i32 %a, %b
+ %ext = zext i1 %icmp0 to i32
+ %icmp1 = icmp eq i32 %ext, 0
+ store i1 %icmp1, i1 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zext_bool_icmp_ne_0:
+; SI-NOT: v_cmp
+; SI: v_cmp_ne_i32_e32 vcc,
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; SI-NEXT: buffer_store_byte [[RESULT]]
+; SI-NEXT: s_endpgm
+define void @zext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %icmp0 = icmp ne i32 %a, %b
+ %ext = zext i1 %icmp0 to i32
+ %icmp1 = icmp ne i32 %ext, 0
+ store i1 %icmp1, i1 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zext_bool_icmp_eq_1:
+; SI-NOT: v_cmp
+; SI: v_cmp_eq_i32_e32 vcc,
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; SI-NEXT: buffer_store_byte [[RESULT]]
+; SI-NEXT: s_endpgm
+define void @zext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %icmp0 = icmp eq i32 %a, %b
+ %ext = zext i1 %icmp0 to i32
+ %icmp1 = icmp eq i32 %ext, 1
+ store i1 %icmp1, i1 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zext_bool_icmp_ne_1:
+; SI-NOT: v_cmp
+; SI: v_cmp_eq_i32_e32 vcc,
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; SI-NEXT: buffer_store_byte [[RESULT]]
+define void @zext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %icmp0 = icmp ne i32 %a, %b
+ %ext = zext i1 %icmp0 to i32
+ %icmp1 = icmp ne i32 %ext, 1
+ store i1 %icmp1, i1 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sext_bool_icmp_ne_k:
+; SI-DAG: s_load_dword [[A:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[B:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[B]]
+; SI: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[VB]], 2{{$}}
+; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
+; SI: buffer_store_byte
+; SI: s_endpgm
+define void @sext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %icmp0 = icmp ne i32 %a, %b
+ %ext = sext i1 %icmp0 to i32
+ %icmp1 = icmp ne i32 %ext, 2
+ store i1 %icmp1, i1 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}cmp_zext_k_i8max:
+; SI: buffer_load_ubyte [[B:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
+; SI: v_mov_b32_e32 [[K255:v[0-9]+]], 0xff{{$}}
+; SI: v_cmp_ne_i32_e32 vcc, [[B]], [[K255]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; SI-NEXT: buffer_store_byte [[RESULT]]
+; SI: s_endpgm
+define void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind {
+ %b.ext = zext i8 %b to i32
+ %icmp0 = icmp ne i32 %b.ext, 255
+ store i1 %icmp0, i1 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}cmp_sext_k_neg1:
+; SI: buffer_load_sbyte [[B:v[0-9]+]]
+; SI: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}}
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
+; SI-NEXT: buffer_store_byte [[RESULT]]
+; SI: s_endpgm
+define void @cmp_sext_k_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %b.ptr) nounwind {
+ %b = load i8 addrspace(1)* %b.ptr
+ %b.ext = sext i8 %b to i32
+ %icmp0 = icmp ne i32 %b.ext, -1
+ store i1 %icmp0, i1 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_sext_arg:
+; SI: s_load_dword [[B:s[0-9]+]]
+; SI: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}}
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
+; SI-NEXT: buffer_store_byte [[RESULT]]
+; SI: s_endpgm
+define void @cmp_sext_k_neg1_i8_sext_arg(i1 addrspace(1)* %out, i8 signext %b) nounwind {
+ %b.ext = sext i8 %b to i32
+ %icmp0 = icmp ne i32 %b.ext, -1
+ store i1 %icmp0, i1 addrspace(1)* %out
+ ret void
+}
+
+; FIXME: This ends up doing a buffer_load_ubyte, and and compare to
+; 255. Seems to be because of ordering problems when not allowing load widths to be reduced.
+; Should do a buffer_load_sbyte and compare with -1
+
+; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_arg:
+; SI-DAG: buffer_load_ubyte [[B:v[0-9]+]]
+; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xff{{$}}
+; SI: v_cmp_ne_i32_e32 vcc, [[B]], [[K]]{{$}}
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; SI-NEXT: buffer_store_byte [[RESULT]]
+; SI: s_endpgm
+define void @cmp_sext_k_neg1_i8_arg(i1 addrspace(1)* %out, i8 %b) nounwind {
+ %b.ext = sext i8 %b to i32
+ %icmp0 = icmp ne i32 %b.ext, -1
+ store i1 %icmp0, i1 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}cmp_zext_k_neg1:
+; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
+; SI-NEXT: buffer_store_byte [[RESULT]]
+; SI: s_endpgm
+define void @cmp_zext_k_neg1(i1 addrspace(1)* %out, i8 %b) nounwind {
+ %b.ext = zext i8 %b to i32
+ %icmp0 = icmp ne i32 %b.ext, -1
+ store i1 %icmp0, i1 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zext_bool_icmp_ne_k:
+; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
+; SI-NEXT: buffer_store_byte [[RESULT]]
+; SI-NEXT: s_endpgm
+define void @zext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %icmp0 = icmp ne i32 %a, %b
+ %ext = zext i1 %icmp0 to i32
+ %icmp1 = icmp ne i32 %ext, 2
+ store i1 %icmp1, i1 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zext_bool_icmp_eq_k:
+; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
+; SI-NEXT: buffer_store_byte [[RESULT]]
+; SI-NEXT: s_endpgm
+define void @zext_bool_icmp_eq_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %icmp0 = icmp ne i32 %a, %b
+ %ext = zext i1 %icmp0 to i32
+ %icmp1 = icmp eq i32 %ext, 2
+ store i1 %icmp1, i1 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/setcc.ll b/test/CodeGen/R600/setcc.ll
index 5bd95b79c0f0..f9c7e4f36128 100644
--- a/test/CodeGen/R600/setcc.ll
+++ b/test/CodeGen/R600/setcc.ll
@@ -1,7 +1,9 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
-;RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; FUNC-LABEL: @setcc_v2i32
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+; FUNC-LABEL: {{^}}setcc_v2i32:
; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[3].X, KC0[3].Z
; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[2].W, KC0[3].Y
@@ -12,7 +14,7 @@ define void @setcc_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %
ret void
}
-; FUNC-LABEL: @setcc_v4i32
+; FUNC-LABEL: {{^}}setcc_v4i32:
; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
@@ -32,9 +34,9 @@ define void @setcc_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %
;; Float comparisons
;;;==========================================================================;;;
-; FUNC-LABEL: @f32_oeq
+; FUNC-LABEL: {{^}}f32_oeq:
; R600: SETE_DX10
-; SI: V_CMP_EQ_F32
+; SI: v_cmp_eq_f32
define void @f32_oeq(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp oeq float %a, %b
@@ -43,9 +45,9 @@ entry:
ret void
}
-; FUNC-LABEL: @f32_ogt
+; FUNC-LABEL: {{^}}f32_ogt:
; R600: SETGT_DX10
-; SI: V_CMP_GT_F32
+; SI: v_cmp_gt_f32
define void @f32_ogt(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp ogt float %a, %b
@@ -54,9 +56,9 @@ entry:
ret void
}
-; FUNC-LABEL: @f32_oge
+; FUNC-LABEL: {{^}}f32_oge:
; R600: SETGE_DX10
-; SI: V_CMP_GE_F32
+; SI: v_cmp_ge_f32
define void @f32_oge(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp oge float %a, %b
@@ -65,9 +67,9 @@ entry:
ret void
}
-; FUNC-LABEL: @f32_olt
+; FUNC-LABEL: {{^}}f32_olt:
; R600: SETGT_DX10
-; SI: V_CMP_LT_F32
+; SI: v_cmp_lt_f32
define void @f32_olt(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp olt float %a, %b
@@ -76,9 +78,9 @@ entry:
ret void
}
-; FUNC-LABEL: @f32_ole
+; FUNC-LABEL: {{^}}f32_ole:
; R600: SETGE_DX10
-; SI: V_CMP_LE_F32
+; SI: v_cmp_le_f32
define void @f32_ole(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp ole float %a, %b
@@ -87,18 +89,16 @@ entry:
ret void
}
-; FUNC-LABEL: @f32_one
+; FUNC-LABEL: {{^}}f32_one:
; R600-DAG: SETE_DX10
; R600-DAG: SETE_DX10
; R600-DAG: AND_INT
; R600-DAG: SETNE_DX10
; R600-DAG: AND_INT
; R600-DAG: SETNE_INT
-; SI: V_CMP_O_F32
-; SI: V_CMP_NEQ_F32
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_AND_B32_e32
+
+; SI: v_cmp_lg_f32_e32 vcc
+; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
define void @f32_one(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp one float %a, %b
@@ -107,12 +107,12 @@ entry:
ret void
}
-; FUNC-LABEL: @f32_ord
+; FUNC-LABEL: {{^}}f32_ord:
; R600-DAG: SETE_DX10
; R600-DAG: SETE_DX10
; R600-DAG: AND_INT
; R600-DAG: SETNE_INT
-; SI: V_CMP_O_F32
+; SI: v_cmp_o_f32
define void @f32_ord(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp ord float %a, %b
@@ -121,18 +121,16 @@ entry:
ret void
}
-; FUNC-LABEL: @f32_ueq
+; FUNC-LABEL: {{^}}f32_ueq:
; R600-DAG: SETNE_DX10
; R600-DAG: SETNE_DX10
; R600-DAG: OR_INT
; R600-DAG: SETE_DX10
; R600-DAG: OR_INT
; R600-DAG: SETNE_INT
-; SI: V_CMP_U_F32
-; SI: V_CMP_EQ_F32
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_OR_B32_e32
+
+; SI: v_cmp_nlg_f32_e32 vcc
+; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
define void @f32_ueq(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp ueq float %a, %b
@@ -141,14 +139,11 @@ entry:
ret void
}
-; FUNC-LABEL: @f32_ugt
+; FUNC-LABEL: {{^}}f32_ugt:
; R600: SETGE
; R600: SETE_DX10
-; SI: V_CMP_U_F32
-; SI: V_CMP_GT_F32
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_OR_B32_e32
+; SI: v_cmp_nle_f32_e32 vcc
+; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
define void @f32_ugt(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp ugt float %a, %b
@@ -157,14 +152,12 @@ entry:
ret void
}
-; FUNC-LABEL: @f32_uge
+; FUNC-LABEL: {{^}}f32_uge:
; R600: SETGT
; R600: SETE_DX10
-; SI: V_CMP_U_F32
-; SI: V_CMP_GE_F32
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_OR_B32_e32
+
+; SI: v_cmp_nlt_f32_e32 vcc
+; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
define void @f32_uge(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp uge float %a, %b
@@ -173,14 +166,12 @@ entry:
ret void
}
-; FUNC-LABEL: @f32_ult
+; FUNC-LABEL: {{^}}f32_ult:
; R600: SETGE
; R600: SETE_DX10
-; SI: V_CMP_U_F32
-; SI: V_CMP_LT_F32
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_OR_B32_e32
+
+; SI: v_cmp_nge_f32_e32 vcc
+; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
define void @f32_ult(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp ult float %a, %b
@@ -189,14 +180,12 @@ entry:
ret void
}
-; FUNC-LABEL: @f32_ule
+; FUNC-LABEL: {{^}}f32_ule:
; R600: SETGT
; R600: SETE_DX10
-; SI: V_CMP_U_F32
-; SI: V_CMP_LE_F32
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_OR_B32_e32
+
+; SI: v_cmp_ngt_f32_e32 vcc
+; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
define void @f32_ule(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp ule float %a, %b
@@ -205,9 +194,9 @@ entry:
ret void
}
-; FUNC-LABEL: @f32_une
+; FUNC-LABEL: {{^}}f32_une:
; R600: SETNE_DX10
-; SI: V_CMP_NEQ_F32
+; SI: v_cmp_neq_f32
define void @f32_une(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp une float %a, %b
@@ -216,12 +205,12 @@ entry:
ret void
}
-; FUNC-LABEL: @f32_uno
+; FUNC-LABEL: {{^}}f32_uno:
; R600: SETNE_DX10
; R600: SETNE_DX10
; R600: OR_INT
; R600: SETNE_INT
-; SI: V_CMP_U_F32
+; SI: v_cmp_u_f32
define void @f32_uno(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp uno float %a, %b
@@ -234,9 +223,9 @@ entry:
;; 32-bit integer comparisons
;;;==========================================================================;;;
-; FUNC-LABEL: @i32_eq
+; FUNC-LABEL: {{^}}i32_eq:
; R600: SETE_INT
-; SI: V_CMP_EQ_I32
+; SI: v_cmp_eq_i32
define void @i32_eq(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%0 = icmp eq i32 %a, %b
@@ -245,9 +234,9 @@ entry:
ret void
}
-; FUNC-LABEL: @i32_ne
+; FUNC-LABEL: {{^}}i32_ne:
; R600: SETNE_INT
-; SI: V_CMP_NE_I32
+; SI: v_cmp_ne_i32
define void @i32_ne(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%0 = icmp ne i32 %a, %b
@@ -256,9 +245,9 @@ entry:
ret void
}
-; FUNC-LABEL: @i32_ugt
+; FUNC-LABEL: {{^}}i32_ugt:
; R600: SETGT_UINT
-; SI: V_CMP_GT_U32
+; SI: v_cmp_gt_u32
define void @i32_ugt(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%0 = icmp ugt i32 %a, %b
@@ -267,9 +256,9 @@ entry:
ret void
}
-; FUNC-LABEL: @i32_uge
+; FUNC-LABEL: {{^}}i32_uge:
; R600: SETGE_UINT
-; SI: V_CMP_GE_U32
+; SI: v_cmp_ge_u32
define void @i32_uge(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%0 = icmp uge i32 %a, %b
@@ -278,9 +267,9 @@ entry:
ret void
}
-; FUNC-LABEL: @i32_ult
+; FUNC-LABEL: {{^}}i32_ult:
; R600: SETGT_UINT
-; SI: V_CMP_LT_U32
+; SI: v_cmp_lt_u32
define void @i32_ult(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%0 = icmp ult i32 %a, %b
@@ -289,9 +278,9 @@ entry:
ret void
}
-; FUNC-LABEL: @i32_ule
+; FUNC-LABEL: {{^}}i32_ule:
; R600: SETGE_UINT
-; SI: V_CMP_LE_U32
+; SI: v_cmp_le_u32
define void @i32_ule(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%0 = icmp ule i32 %a, %b
@@ -300,9 +289,9 @@ entry:
ret void
}
-; FUNC-LABEL: @i32_sgt
+; FUNC-LABEL: {{^}}i32_sgt:
; R600: SETGT_INT
-; SI: V_CMP_GT_I32
+; SI: v_cmp_gt_i32
define void @i32_sgt(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%0 = icmp sgt i32 %a, %b
@@ -311,9 +300,9 @@ entry:
ret void
}
-; FUNC-LABEL: @i32_sge
+; FUNC-LABEL: {{^}}i32_sge:
; R600: SETGE_INT
-; SI: V_CMP_GE_I32
+; SI: v_cmp_ge_i32
define void @i32_sge(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%0 = icmp sge i32 %a, %b
@@ -322,9 +311,9 @@ entry:
ret void
}
-; FUNC-LABEL: @i32_slt
+; FUNC-LABEL: {{^}}i32_slt:
; R600: SETGT_INT
-; SI: V_CMP_LT_I32
+; SI: v_cmp_lt_i32
define void @i32_slt(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%0 = icmp slt i32 %a, %b
@@ -333,9 +322,9 @@ entry:
ret void
}
-; FUNC-LABEL: @i32_sle
+; FUNC-LABEL: {{^}}i32_sle:
; R600: SETGE_INT
-; SI: V_CMP_LE_I32
+; SI: v_cmp_le_i32
define void @i32_sle(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%0 = icmp sle i32 %a, %b
@@ -343,3 +332,46 @@ entry:
store i32 %1, i32 addrspace(1)* %out
ret void
}
+
+; FIXME: This does 4 compares
+; FUNC-LABEL: {{^}}v3i32_eq:
+; SI-DAG: v_cmp_eq_i32
+; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
+; SI-DAG: v_cmp_eq_i32
+; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
+; SI-DAG: v_cmp_eq_i32
+; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
+; SI: s_endpgm
+define void @v3i32_eq(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %ptra, <3 x i32> addrspace(1)* %ptrb) {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.a = getelementptr <3 x i32> addrspace(1)* %ptra, i32 %tid
+ %gep.b = getelementptr <3 x i32> addrspace(1)* %ptrb, i32 %tid
+ %gep.out = getelementptr <3 x i32> addrspace(1)* %out, i32 %tid
+ %a = load <3 x i32> addrspace(1)* %gep.a
+ %b = load <3 x i32> addrspace(1)* %gep.b
+ %cmp = icmp eq <3 x i32> %a, %b
+ %ext = sext <3 x i1> %cmp to <3 x i32>
+ store <3 x i32> %ext, <3 x i32> addrspace(1)* %gep.out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v3i8_eq:
+; SI-DAG: v_cmp_eq_i32
+; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
+; SI-DAG: v_cmp_eq_i32
+; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
+; SI-DAG: v_cmp_eq_i32
+; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
+; SI: s_endpgm
+define void @v3i8_eq(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %ptra, <3 x i8> addrspace(1)* %ptrb) {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.a = getelementptr <3 x i8> addrspace(1)* %ptra, i32 %tid
+ %gep.b = getelementptr <3 x i8> addrspace(1)* %ptrb, i32 %tid
+ %gep.out = getelementptr <3 x i8> addrspace(1)* %out, i32 %tid
+ %a = load <3 x i8> addrspace(1)* %gep.a
+ %b = load <3 x i8> addrspace(1)* %gep.b
+ %cmp = icmp eq <3 x i8> %a, %b
+ %ext = sext <3 x i1> %cmp to <3 x i8>
+ store <3 x i8> %ext, <3 x i8> addrspace(1)* %gep.out
+ ret void
+}
diff --git a/test/CodeGen/R600/setcc64.ll b/test/CodeGen/R600/setcc64.ll
index 54a33b30940a..c0632198efdc 100644
--- a/test/CodeGen/R600/setcc64.ll
+++ b/test/CodeGen/R600/setcc64.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
+;RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
; XXX: Merge this into setcc, once R600 supports 64-bit operations
@@ -6,8 +6,8 @@
;; Double comparisons
;;;==========================================================================;;;
-; FUNC-LABEL: @f64_oeq
-; SI: V_CMP_EQ_F64
+; FUNC-LABEL: {{^}}f64_oeq:
+; SI: v_cmp_eq_f64
define void @f64_oeq(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp oeq double %a, %b
@@ -16,8 +16,8 @@ entry:
ret void
}
-; FUNC-LABEL: @f64_ogt
-; SI: V_CMP_GT_F64
+; FUNC-LABEL: {{^}}f64_ogt:
+; SI: v_cmp_gt_f64
define void @f64_ogt(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp ogt double %a, %b
@@ -26,8 +26,8 @@ entry:
ret void
}
-; FUNC-LABEL: @f64_oge
-; SI: V_CMP_GE_F64
+; FUNC-LABEL: {{^}}f64_oge:
+; SI: v_cmp_ge_f64
define void @f64_oge(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp oge double %a, %b
@@ -36,8 +36,8 @@ entry:
ret void
}
-; FUNC-LABEL: @f64_olt
-; SI: V_CMP_LT_F64
+; FUNC-LABEL: {{^}}f64_olt:
+; SI: v_cmp_lt_f64
define void @f64_olt(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp olt double %a, %b
@@ -46,8 +46,8 @@ entry:
ret void
}
-; FUNC-LABEL: @f64_ole
-; SI: V_CMP_LE_F64
+; FUNC-LABEL: {{^}}f64_ole:
+; SI: v_cmp_le_f64
define void @f64_ole(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp ole double %a, %b
@@ -56,12 +56,9 @@ entry:
ret void
}
-; FUNC-LABEL: @f64_one
-; SI: V_CMP_O_F64
-; SI: V_CMP_NEQ_F64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_AND_B32_e32
+; FUNC-LABEL: {{^}}f64_one:
+; SI: v_cmp_lg_f64_e32 vcc
+; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
define void @f64_one(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp one double %a, %b
@@ -70,8 +67,8 @@ entry:
ret void
}
-; FUNC-LABEL: @f64_ord
-; SI: V_CMP_O_F64
+; FUNC-LABEL: {{^}}f64_ord:
+; SI: v_cmp_o_f64
define void @f64_ord(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp ord double %a, %b
@@ -80,12 +77,9 @@ entry:
ret void
}
-; FUNC-LABEL: @f64_ueq
-; SI: V_CMP_U_F64
-; SI: V_CMP_EQ_F64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_OR_B32_e32
+; FUNC-LABEL: {{^}}f64_ueq:
+; SI: v_cmp_nlg_f64_e32 vcc
+; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
define void @f64_ueq(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp ueq double %a, %b
@@ -94,12 +88,10 @@ entry:
ret void
}
-; FUNC-LABEL: @f64_ugt
-; SI: V_CMP_U_F64
-; SI: V_CMP_GT_F64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_OR_B32_e32
+; FUNC-LABEL: {{^}}f64_ugt:
+
+; SI: v_cmp_nle_f64_e32 vcc
+; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
define void @f64_ugt(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp ugt double %a, %b
@@ -108,12 +100,9 @@ entry:
ret void
}
-; FUNC-LABEL: @f64_uge
-; SI: V_CMP_U_F64
-; SI: V_CMP_GE_F64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_OR_B32_e32
+; FUNC-LABEL: {{^}}f64_uge:
+; SI: v_cmp_nlt_f64_e32 vcc
+; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
define void @f64_uge(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp uge double %a, %b
@@ -122,12 +111,9 @@ entry:
ret void
}
-; FUNC-LABEL: @f64_ult
-; SI: V_CMP_U_F64
-; SI: V_CMP_LT_F64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_OR_B32_e32
+; FUNC-LABEL: {{^}}f64_ult:
+; SI: v_cmp_nge_f64_e32 vcc
+; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
define void @f64_ult(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp ult double %a, %b
@@ -136,12 +122,9 @@ entry:
ret void
}
-; FUNC-LABEL: @f64_ule
-; SI: V_CMP_U_F64
-; SI: V_CMP_LE_F64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_OR_B32_e32
+; FUNC-LABEL: {{^}}f64_ule:
+; SI: v_cmp_ngt_f64_e32 vcc
+; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
define void @f64_ule(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp ule double %a, %b
@@ -150,8 +133,8 @@ entry:
ret void
}
-; FUNC-LABEL: @f64_une
-; SI: V_CMP_NEQ_F64
+; FUNC-LABEL: {{^}}f64_une:
+; SI: v_cmp_neq_f64
define void @f64_une(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp une double %a, %b
@@ -160,8 +143,8 @@ entry:
ret void
}
-; FUNC-LABEL: @f64_uno
-; SI: V_CMP_U_F64
+; FUNC-LABEL: {{^}}f64_uno:
+; SI: v_cmp_u_f64
define void @f64_uno(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp uno double %a, %b
@@ -174,8 +157,8 @@ entry:
;; 64-bit integer comparisons
;;;==========================================================================;;;
-; FUNC-LABEL: @i64_eq
-; SI: V_CMP_EQ_I64
+; FUNC-LABEL: {{^}}i64_eq:
+; SI: v_cmp_eq_i64
define void @i64_eq(i32 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%0 = icmp eq i64 %a, %b
@@ -184,8 +167,8 @@ entry:
ret void
}
-; FUNC-LABEL: @i64_ne
-; SI: V_CMP_NE_I64
+; FUNC-LABEL: {{^}}i64_ne:
+; SI: v_cmp_ne_i64
define void @i64_ne(i32 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%0 = icmp ne i64 %a, %b
@@ -194,8 +177,8 @@ entry:
ret void
}
-; FUNC-LABEL: @i64_ugt
-; SI: V_CMP_GT_U64
+; FUNC-LABEL: {{^}}i64_ugt:
+; SI: v_cmp_gt_u64
define void @i64_ugt(i32 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%0 = icmp ugt i64 %a, %b
@@ -204,8 +187,8 @@ entry:
ret void
}
-; FUNC-LABEL: @i64_uge
-; SI: V_CMP_GE_U64
+; FUNC-LABEL: {{^}}i64_uge:
+; SI: v_cmp_ge_u64
define void @i64_uge(i32 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%0 = icmp uge i64 %a, %b
@@ -214,8 +197,8 @@ entry:
ret void
}
-; FUNC-LABEL: @i64_ult
-; SI: V_CMP_LT_U64
+; FUNC-LABEL: {{^}}i64_ult:
+; SI: v_cmp_lt_u64
define void @i64_ult(i32 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%0 = icmp ult i64 %a, %b
@@ -224,8 +207,8 @@ entry:
ret void
}
-; FUNC-LABEL: @i64_ule
-; SI: V_CMP_LE_U64
+; FUNC-LABEL: {{^}}i64_ule:
+; SI: v_cmp_le_u64
define void @i64_ule(i32 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%0 = icmp ule i64 %a, %b
@@ -234,8 +217,8 @@ entry:
ret void
}
-; FUNC-LABEL: @i64_sgt
-; SI: V_CMP_GT_I64
+; FUNC-LABEL: {{^}}i64_sgt:
+; SI: v_cmp_gt_i64
define void @i64_sgt(i32 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%0 = icmp sgt i64 %a, %b
@@ -244,8 +227,8 @@ entry:
ret void
}
-; FUNC-LABEL: @i64_sge
-; SI: V_CMP_GE_I64
+; FUNC-LABEL: {{^}}i64_sge:
+; SI: v_cmp_ge_i64
define void @i64_sge(i32 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%0 = icmp sge i64 %a, %b
@@ -254,8 +237,8 @@ entry:
ret void
}
-; FUNC-LABEL: @i64_slt
-; SI: V_CMP_LT_I64
+; FUNC-LABEL: {{^}}i64_slt:
+; SI: v_cmp_lt_i64
define void @i64_slt(i32 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%0 = icmp slt i64 %a, %b
@@ -264,8 +247,8 @@ entry:
ret void
}
-; FUNC-LABEL: @i64_sle
-; SI: V_CMP_LE_I64
+; FUNC-LABEL: {{^}}i64_sle:
+; SI: v_cmp_le_i64
define void @i64_sle(i32 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%0 = icmp sle i64 %a, %b
diff --git a/test/CodeGen/R600/seto.ll b/test/CodeGen/R600/seto.ll
index cc942c10a91e..c6265a451c5f 100644
--- a/test/CodeGen/R600/seto.ll
+++ b/test/CodeGen/R600/seto.ll
@@ -1,8 +1,8 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
-
-;CHECK-LABEL: @main
-;CHECK: V_CMP_O_F32_e32 vcc, {{[sv][0-9]+, v[0-9]+}}
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
+; CHECK-LABEL: {{^}}main:
+; CHECK: v_cmp_o_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
+; CHECK-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
define void @main(float %p) {
main_body:
%c = fcmp oeq float %p, %p
diff --git a/test/CodeGen/R600/setuo.ll b/test/CodeGen/R600/setuo.ll
index 33007fc754b8..f2113096e3a7 100644
--- a/test/CodeGen/R600/setuo.ll
+++ b/test/CodeGen/R600/setuo.ll
@@ -1,8 +1,8 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
-
-;CHECK-LABEL: @main
-;CHECK: V_CMP_U_F32_e32 vcc, {{[sv][0-9]+, v[0-9]+}}
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
+; CHECK-LABEL: {{^}}main:
+; CHECK: v_cmp_u_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
+; CHECK-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
define void @main(float %p) {
main_body:
%c = fcmp une float %p, %p
diff --git a/test/CodeGen/R600/sext-eliminate.ll b/test/CodeGen/R600/sext-eliminate.ll
new file mode 100644
index 000000000000..7dc6eb87f6b5
--- /dev/null
+++ b/test/CodeGen/R600/sext-eliminate.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+
+; FUNC-LABEL: {{^}}sext_in_reg_i1_i32_add:
+
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
+; EG: SUB_INT {{[* ]*}}[[RES]]
+; EG-NOT: BFE
+define void @sext_in_reg_i1_i32_add(i32 addrspace(1)* %out, i1 %a, i32 %b) {
+ %sext = sext i1 %a to i32
+ %res = add i32 %b, %sext
+ store i32 %res, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sext_in_reg_i1_i32_sub:
+
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
+; EG: ADD_INT {{[* ]*}}[[RES]]
+; EG-NOT: BFE
+define void @sext_in_reg_i1_i32_sub(i32 addrspace(1)* %out, i1 %a, i32 %b) {
+ %sext = sext i1 %a to i32
+ %res = sub i32 %b, %sext
+ store i32 %res, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/sext-in-reg.ll b/test/CodeGen/R600/sext-in-reg.ll
index 1b02e4bf8015..3260179921f9 100644
--- a/test/CodeGen/R600/sext-in-reg.ll
+++ b/test/CodeGen/R600/sext-in-reg.ll
@@ -1,14 +1,15 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare i32 @llvm.AMDGPU.imax(i32, i32) nounwind readnone
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-; FUNC-LABEL: @sext_in_reg_i1_i32
-; SI: S_LOAD_DWORD [[ARG:s[0-9]+]],
-; SI: S_BFE_I32 [[SEXTRACT:s[0-9]+]], [[ARG]], 0x10000
-; SI: V_MOV_B32_e32 [[EXTRACT:v[0-9]+]], [[SEXTRACT]]
-; SI: BUFFER_STORE_DWORD [[EXTRACT]],
+; FUNC-LABEL: {{^}}sext_in_reg_i1_i32:
+; SI: s_load_dword [[ARG:s[0-9]+]],
+; SI: s_bfe_i32 [[SEXTRACT:s[0-9]+]], [[ARG]], 0x10000
+; SI: v_mov_b32_e32 [[EXTRACT:v[0-9]+]], [[SEXTRACT]]
+; SI: buffer_store_dword [[EXTRACT]],
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
; EG: BFE_INT [[RES]], {{.*}}, 0.0, 1
@@ -20,11 +21,11 @@ define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) {
ret void
}
-; FUNC-LABEL: @sext_in_reg_i8_to_i32
-; SI: S_ADD_I32 [[VAL:s[0-9]+]],
-; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
-; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
-; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
+; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32:
+; SI: s_add_i32 [[VAL:s[0-9]+]],
+; SI: s_sext_i32_i8 [[EXTRACT:s[0-9]+]], [[VAL]]
+; SI: v_mov_b32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
+; SI: buffer_store_dword [[VEXTRACT]],
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
; EG: ADD_INT
@@ -38,11 +39,11 @@ define void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounw
ret void
}
-; FUNC-LABEL: @sext_in_reg_i16_to_i32
-; SI: S_ADD_I32 [[VAL:s[0-9]+]],
-; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]]
-; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
-; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
+; FUNC-LABEL: {{^}}sext_in_reg_i16_to_i32:
+; SI: s_add_i32 [[VAL:s[0-9]+]],
+; SI: s_sext_i32_i16 [[EXTRACT:s[0-9]+]], [[VAL]]
+; SI: v_mov_b32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
+; SI: buffer_store_dword [[VEXTRACT]],
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
; EG: ADD_INT
@@ -56,11 +57,11 @@ define void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) noun
ret void
}
-; FUNC-LABEL: @sext_in_reg_i8_to_v1i32
-; SI: S_ADD_I32 [[VAL:s[0-9]+]],
-; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
-; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
-; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
+; FUNC-LABEL: {{^}}sext_in_reg_i8_to_v1i32:
+; SI: s_add_i32 [[VAL:s[0-9]+]],
+; SI: s_sext_i32_i8 [[EXTRACT:s[0-9]+]], [[VAL]]
+; SI: v_mov_b32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
+; SI: buffer_store_dword [[VEXTRACT]],
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
; EG: ADD_INT
@@ -74,29 +75,31 @@ define void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a,
ret void
}
-; FUNC-LABEL: @sext_in_reg_i1_to_i64
-; SI: S_ADD_I32 [[VAL:s[0-9]+]],
-; SI: S_BFE_I32 s{{[0-9]+}}, s{{[0-9]+}}, 0x10000
-; SI: S_MOV_B32 {{s[0-9]+}}, -1
-; SI: BUFFER_STORE_DWORDX2
+; FUNC-LABEL: {{^}}sext_in_reg_i1_to_i64:
+; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]]
+; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x10000
+; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
+; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
+; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
- %c = add i64 %a, %b
+ %c = shl i64 %a, %b
%shl = shl i64 %c, 63
%ashr = ashr i64 %shl, 63
store i64 %ashr, i64 addrspace(1)* %out, align 8
ret void
}
-; FUNC-LABEL: @sext_in_reg_i8_to_i64
-; SI: S_ADD_I32 [[VAL:s[0-9]+]],
-; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
-; SI: S_MOV_B32 {{s[0-9]+}}, -1
-; SI: BUFFER_STORE_DWORDX2
+; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i64:
+; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]]
+; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x80000
+; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
+; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
+; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
-; EG: ADD_INT
-; EG-NEXT: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
+; EG: LSHL
+; EG: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
; EG: ASHR [[RES_HI]]
; EG-NOT: BFE_INT
; EG: LSHR
@@ -104,23 +107,24 @@ define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounw
;; TODO Check address computation, using | with variables in {{}} does not work,
;; also the _LO/_HI order might be different
define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
- %c = add i64 %a, %b
+ %c = shl i64 %a, %b
%shl = shl i64 %c, 56
%ashr = ashr i64 %shl, 56
store i64 %ashr, i64 addrspace(1)* %out, align 8
ret void
}
-; FUNC-LABEL: @sext_in_reg_i16_to_i64
-; SI: S_ADD_I32 [[VAL:s[0-9]+]],
-; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]]
-; SI: S_MOV_B32 {{s[0-9]+}}, -1
-; SI: BUFFER_STORE_DWORDX2
+; FUNC-LABEL: {{^}}sext_in_reg_i16_to_i64:
+; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]]
+; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x100000
+; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
+; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
+; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
-; EG: ADD_INT
-; EG-NEXT: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
+; EG: LSHL
+; EG: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
; EG: ASHR [[RES_HI]]
; EG-NOT: BFE_INT
; EG: LSHR
@@ -128,32 +132,32 @@ define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounw
;; TODO Check address computation, using | with variables in {{}} does not work,
;; also the _LO/_HI order might be different
define void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
- %c = add i64 %a, %b
+ %c = shl i64 %a, %b
%shl = shl i64 %c, 48
%ashr = ashr i64 %shl, 48
store i64 %ashr, i64 addrspace(1)* %out, align 8
ret void
}
-; FUNC-LABEL: @sext_in_reg_i32_to_i64
-; SI: S_LOAD_DWORD
-; SI: S_LOAD_DWORD
-; SI: S_ADD_I32 [[ADD:s[0-9]+]],
-; SI: S_ASHR_I32 s{{[0-9]+}}, [[ADD]], 31
-; SI: BUFFER_STORE_DWORDX2
+; FUNC-LABEL: {{^}}sext_in_reg_i32_to_i64:
+; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]]
+; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x200000
+; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
+; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
+; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
; EG-NOT: BFE_INT
-; EG: ADD_INT {{\*?}} [[RES_LO]]
+
; EG: ASHR [[RES_HI]]
-; EG: ADD_INT
+
; EG: LSHR
; EG: LSHR
;; TODO Check address computation, using | with variables in {{}} does not work,
;; also the _LO/_HI order might be different
define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
- %c = add i64 %a, %b
+ %c = shl i64 %a, %b
%shl = shl i64 %c, 32
%ashr = ashr i64 %shl, 32
store i64 %ashr, i64 addrspace(1)* %out, align 8
@@ -161,10 +165,10 @@ define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) noun
}
; This is broken on Evergreen for some reason related to the <1 x i64> kernel arguments.
-; XFUNC-LABEL: @sext_in_reg_i8_to_v1i64
-; XSI: S_BFE_I32 [[EXTRACT:s[0-9]+]], {{s[0-9]+}}, 524288
-; XSI: S_ASHR_I32 {{v[0-9]+}}, [[EXTRACT]], 31
-; XSI: BUFFER_STORE_DWORD
+; XFUNC-LABEL: {{^}}sext_in_reg_i8_to_v1i64:
+; XSI: s_bfe_i32 [[EXTRACT:s[0-9]+]], {{s[0-9]+}}, 524288
+; XSI: s_ashr_i32 {{v[0-9]+}}, [[EXTRACT]], 31
+; XSI: buffer_store_dword
; XEG: BFE_INT
; XEG: ASHR
; define void @sext_in_reg_i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a, <1 x i64> %b) nounwind {
@@ -175,10 +179,93 @@ define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) noun
; ret void
; }
-; FUNC-LABEL: @sext_in_reg_i1_in_i32_other_amount
-; SI-NOT: BFE
-; SI: S_LSHL_B32 [[REG:s[0-9]+]], {{s[0-9]+}}, 6
-; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG]], 7
+; FUNC-LABEL: {{^}}v_sext_in_reg_i1_to_i64:
+; SI: buffer_load_dwordx2
+; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
+; SI: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 1
+; SI: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
+; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @v_sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %a.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
+ %b.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
+ %out.gep = getelementptr i64 addrspace(1)* %out, i32 %tid
+ %a = load i64 addrspace(1)* %a.gep, align 8
+ %b = load i64 addrspace(1)* %b.gep, align 8
+
+ %c = shl i64 %a, %b
+ %shl = shl i64 %c, 63
+ %ashr = ashr i64 %shl, 63
+ store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_sext_in_reg_i8_to_i64:
+; SI: buffer_load_dwordx2
+; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
+; SI: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 8
+; SI: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
+; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @v_sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %a.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
+ %b.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
+ %out.gep = getelementptr i64 addrspace(1)* %out, i32 %tid
+ %a = load i64 addrspace(1)* %a.gep, align 8
+ %b = load i64 addrspace(1)* %b.gep, align 8
+
+ %c = shl i64 %a, %b
+ %shl = shl i64 %c, 56
+ %ashr = ashr i64 %shl, 56
+ store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_sext_in_reg_i16_to_i64:
+; SI: buffer_load_dwordx2
+; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
+; SI: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 16
+; SI: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
+; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @v_sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %a.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
+ %b.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
+ %out.gep = getelementptr i64 addrspace(1)* %out, i32 %tid
+ %a = load i64 addrspace(1)* %a.gep, align 8
+ %b = load i64 addrspace(1)* %b.gep, align 8
+
+ %c = shl i64 %a, %b
+ %shl = shl i64 %c, 48
+ %ashr = ashr i64 %shl, 48
+ store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_sext_in_reg_i32_to_i64:
+; SI: buffer_load_dwordx2
+; SI: v_lshl_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}},
+; SI: v_ashrrev_i32_e32 v[[SHR:[0-9]+]], 31, v[[LO]]
+; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[SHR]]{{\]}}
+define void @v_sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %a.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
+ %b.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
+ %out.gep = getelementptr i64 addrspace(1)* %out, i32 %tid
+ %a = load i64 addrspace(1)* %a.gep, align 8
+ %b = load i64 addrspace(1)* %b.gep, align 8
+
+ %c = shl i64 %a, %b
+ %shl = shl i64 %c, 32
+ %ashr = ashr i64 %shl, 32
+ store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sext_in_reg_i1_in_i32_other_amount:
+; SI-NOT: {{[^@]}}bfe
+; SI: s_lshl_b32 [[REG:s[0-9]+]], {{s[0-9]+}}, 6
+; SI: s_ashr_i32 {{s[0-9]+}}, [[REG]], 7
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
; EG-NOT: BFE
@@ -194,11 +281,12 @@ define void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)* %out, i32 %a,
ret void
}
-; FUNC-LABEL: @sext_in_reg_v2i1_in_v2i32_other_amount
-; SI: S_LSHL_B32 [[REG0:s[0-9]+]], {{s[0-9]}}, 6
-; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG0]], 7
-; SI: S_LSHL_B32 [[REG1:s[0-9]+]], {{s[0-9]}}, 6
-; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG1]], 7
+; FUNC-LABEL: {{^}}sext_in_reg_v2i1_in_v2i32_other_amount:
+; SI-DAG: s_lshl_b32 [[REG0:s[0-9]+]], {{s[0-9]}}, 6
+; SI-DAG: s_ashr_i32 {{s[0-9]+}}, [[REG0]], 7
+; SI-DAG: s_lshl_b32 [[REG1:s[0-9]+]], {{s[0-9]}}, 6
+; SI-DAG: s_ashr_i32 {{s[0-9]+}}, [[REG1]], 7
+; SI: s_endpgm
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
; EG-NOT: BFE
@@ -217,10 +305,10 @@ define void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out
}
-; FUNC-LABEL: @sext_in_reg_v2i1_to_v2i32
-; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
-; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
-; SI: BUFFER_STORE_DWORDX2
+; FUNC-LABEL: {{^}}sext_in_reg_v2i1_to_v2i32:
+; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
+; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
+; SI: buffer_store_dwordx2
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
; EG: BFE_INT [[RES]]
@@ -234,12 +322,12 @@ define void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %
ret void
}
-; FUNC-LABEL: @sext_in_reg_v4i1_to_v4i32
-; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
-; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
-; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
-; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
-; SI: BUFFER_STORE_DWORDX4
+; FUNC-LABEL: {{^}}sext_in_reg_v4i1_to_v4i32:
+; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
+; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
+; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
+; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
+; SI: buffer_store_dwordx4
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
; EG: BFE_INT [[RES]]
@@ -255,10 +343,10 @@ define void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %
ret void
}
-; FUNC-LABEL: @sext_in_reg_v2i8_to_v2i32
-; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: BUFFER_STORE_DWORDX2
+; FUNC-LABEL: {{^}}sext_in_reg_v2i8_to_v2i32:
+; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: buffer_store_dwordx2
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
; EG: BFE_INT [[RES]]
@@ -272,12 +360,12 @@ define void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %
ret void
}
-; FUNC-LABEL: @sext_in_reg_v4i8_to_v4i32
-; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: BUFFER_STORE_DWORDX4
+; FUNC-LABEL: {{^}}sext_in_reg_v4i8_to_v4i32:
+; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: buffer_store_dwordx4
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
; EG: BFE_INT [[RES]]
@@ -293,10 +381,10 @@ define void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %
ret void
}
-; FUNC-LABEL: @sext_in_reg_v2i16_to_v2i32
-; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: BUFFER_STORE_DWORDX2
+; FUNC-LABEL: {{^}}sext_in_reg_v2i16_to_v2i32:
+; SI: s_sext_i32_i16 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: s_sext_i32_i16 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: buffer_store_dwordx2
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
; EG: BFE_INT [[RES]]
@@ -310,7 +398,7 @@ define void @sext_in_reg_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32>
ret void
}
-; FUNC-LABEL: @testcase
+; FUNC-LABEL: {{^}}testcase:
define void @testcase(i8 addrspace(1)* %out, i8 %a) nounwind {
%and_a_1 = and i8 %a, 1
%cmp_eq = icmp eq i8 %and_a_1, 0
@@ -322,7 +410,7 @@ define void @testcase(i8 addrspace(1)* %out, i8 %a) nounwind {
ret void
}
-; FUNC-LABEL: @testcase_3
+; FUNC-LABEL: {{^}}testcase_3:
define void @testcase_3(i8 addrspace(1)* %out, i8 %a) nounwind {
%and_a_1 = and i8 %a, 1
%cmp_eq = icmp eq i8 %and_a_1, 0
@@ -334,11 +422,11 @@ define void @testcase_3(i8 addrspace(1)* %out, i8 %a) nounwind {
ret void
}
-; FUNC-LABEL: @vgpr_sext_in_reg_v4i8_to_v4i32
-; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
-; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
-; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
-; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
+; FUNC-LABEL: {{^}}vgpr_sext_in_reg_v4i8_to_v4i32:
+; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
+; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
+; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
+; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
define void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
%loada = load <4 x i32> addrspace(1)* %a, align 16
%loadb = load <4 x i32> addrspace(1)* %b, align 16
@@ -349,9 +437,9 @@ define void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i
ret void
}
-; FUNC-LABEL: @vgpr_sext_in_reg_v4i16_to_v4i32
-; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
-; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
+; FUNC-LABEL: {{^}}vgpr_sext_in_reg_v4i16_to_v4i32:
+; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
+; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
define void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
%loada = load <4 x i32> addrspace(1)* %a, align 16
%loadb = load <4 x i32> addrspace(1)* %b, align 16
@@ -365,11 +453,11 @@ define void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x
; FIXME: The BFE should really be eliminated. I think it should happen
; when computeKnownBitsForTargetNode is implemented for imax.
-; FUNC-LABEL: @sext_in_reg_to_illegal_type
-; SI: BUFFER_LOAD_SBYTE
-; SI: V_MAX_I32
-; SI: V_BFE_I32
-; SI: BUFFER_STORE_SHORT
+; FUNC-LABEL: {{^}}sext_in_reg_to_illegal_type:
+; SI: buffer_load_sbyte
+; SI: v_max_i32
+; SI: v_bfe_i32
+; SI: buffer_store_short
define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
%tmp5 = load i8 addrspace(1)* %src, align 1
%tmp2 = sext i8 %tmp5 to i32
@@ -382,9 +470,9 @@ define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 ad
declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone
-; FUNC-LABEL: @bfe_0_width
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_0_width:
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
%load = load i32 addrspace(1)* %ptr, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 8, i32 0) nounwind readnone
@@ -392,10 +480,10 @@ define void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwin
ret void
}
-; FUNC-LABEL: @bfe_8_bfe_8
-; SI: V_BFE_I32
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_8_bfe_8:
+; SI: v_bfe_i32
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
%load = load i32 addrspace(1)* %ptr, align 4
%bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
@@ -404,9 +492,9 @@ define void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwin
ret void
}
-; FUNC-LABEL: @bfe_8_bfe_16
-; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_8_bfe_16:
+; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
+; SI: s_endpgm
define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
%load = load i32 addrspace(1)* %ptr, align 4
%bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
@@ -416,10 +504,10 @@ define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwi
}
; This really should be folded into 1
-; FUNC-LABEL: @bfe_16_bfe_8
-; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_16_bfe_8:
+; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
%load = load i32 addrspace(1)* %ptr, align 4
%bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 16) nounwind readnone
@@ -429,10 +517,10 @@ define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwi
}
; Make sure there isn't a redundant BFE
-; FUNC-LABEL: @sext_in_reg_i8_to_i32_bfe
-; SI: S_SEXT_I32_I8 s{{[0-9]+}}, s{{[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32_bfe:
+; SI: s_sext_i32_i8 s{{[0-9]+}}, s{{[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%c = add i32 %a, %b ; add to prevent folding into extload
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 0, i32 8) nounwind readnone
@@ -442,7 +530,7 @@ define void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) n
ret void
}
-; FUNC-LABEL: @sext_in_reg_i8_to_i32_bfe_wrong
+; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32_bfe_wrong:
define void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%c = add i32 %a, %b ; add to prevent folding into extload
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 8, i32 0) nounwind readnone
@@ -452,10 +540,10 @@ define void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32
ret void
}
-; FUNC-LABEL: @sextload_i8_to_i32_bfe
-; SI: BUFFER_LOAD_SBYTE
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}sextload_i8_to_i32_bfe:
+; SI: buffer_load_sbyte
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
%load = load i8 addrspace(1)* %ptr, align 1
%sext = sext i8 %load to i32
@@ -466,9 +554,10 @@ define void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %pt
ret void
}
-; FUNC-LABEL: @sextload_i8_to_i32_bfe_0:
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}sextload_i8_to_i32_bfe_0:
+; SI: .text
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
%load = load i8 addrspace(1)* %ptr, align 1
%sext = sext i8 %load to i32
@@ -479,11 +568,11 @@ define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %
ret void
}
-; FUNC-LABEL: @sext_in_reg_i1_bfe_offset_0:
-; SI-NOT: SHR
-; SI-NOT: SHL
-; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}sext_in_reg_i1_bfe_offset_0:
+; SI-NOT: shr
+; SI-NOT: shl
+; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
+; SI: s_endpgm
define void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
@@ -493,12 +582,12 @@ define void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1
ret void
}
-; FUNC-LABEL: @sext_in_reg_i1_bfe_offset_1
-; SI: BUFFER_LOAD_DWORD
-; SI-NOT: SHL
-; SI-NOT: SHR
-; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}sext_in_reg_i1_bfe_offset_1:
+; SI: buffer_load_dword
+; SI-NOT: shl
+; SI-NOT: shr
+; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1
+; SI: s_endpgm
define void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 30
@@ -508,12 +597,12 @@ define void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1
ret void
}
-; FUNC-LABEL: @sext_in_reg_i2_bfe_offset_1:
-; SI: BUFFER_LOAD_DWORD
-; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}}
-; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}}
-; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}sext_in_reg_i2_bfe_offset_1:
+; SI: buffer_load_dword
+; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}}
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}}
+; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2
+; SI: s_endpgm
define void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%x = load i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 30
diff --git a/test/CodeGen/R600/sgpr-control-flow.ll b/test/CodeGen/R600/sgpr-control-flow.ll
index 06ad24d959cf..f0236acc6daa 100644
--- a/test/CodeGen/R600/sgpr-control-flow.ll
+++ b/test/CodeGen/R600/sgpr-control-flow.ll
@@ -1,12 +1,17 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
;
;
; Most SALU instructions ignore control flow, so we need to make sure
; they don't overwrite values from other blocks.
-; SI-NOT: S_ADD
+; If the branch decision is made based on a value in an SGPR then all
+; threads will execute the same code paths, so we don't need to worry
+; about instructions in different blocks overwriting each other.
+; SI-LABEL: {{^}}sgpr_if_else_salu_br:
+; SI: s_add
+; SI: s_add
-define void @sgpr_if_else(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
+define void @sgpr_if_else_salu_br(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
entry:
%0 = icmp eq i32 %a, 0
br i1 %0, label %if, label %else
@@ -25,3 +30,76 @@ endif:
store i32 %4, i32 addrspace(1)* %out
ret void
}
+
+; The two S_ADD instructions should write to different registers, since
+; different threads will take different control flow paths.
+
+; SI-LABEL: {{^}}sgpr_if_else_valu_br:
+; SI: s_add_i32 [[SGPR:s[0-9]+]]
+; SI-NOT: s_add_i32 [[SGPR]]
+
+define void @sgpr_if_else_valu_br(i32 addrspace(1)* %out, float %a, i32 %b, i32 %c, i32 %d, i32 %e) {
+entry:
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %tid_f = uitofp i32 %tid to float
+ %tmp1 = fcmp ueq float %tid_f, 0.0
+ br i1 %tmp1, label %if, label %else
+
+if:
+ %tmp2 = add i32 %b, %c
+ br label %endif
+
+else:
+ %tmp3 = add i32 %d, %e
+ br label %endif
+
+endif:
+ %tmp4 = phi i32 [%tmp2, %if], [%tmp3, %else]
+ store i32 %tmp4, i32 addrspace(1)* %out
+ ret void
+}
+
+; FIXME: Should write to different SGPR pairs instead of copying to
+; VALU for i1 phi.
+
+; SI-LABEL: {{^}}sgpr_if_else_valu_cmp_phi_br:
+; SI: buffer_load_dword [[AVAL:v[0-9]+]]
+; SI: v_cmp_lt_i32_e64 [[CMP_IF:s\[[0-9]+:[0-9]+\]]], [[AVAL]], 0
+; SI: v_cndmask_b32_e64 [[V_CMP:v[0-9]+]], 0, -1, [[CMP_IF]]
+
+; SI: BB2_1:
+; SI: buffer_load_dword [[AVAL:v[0-9]+]]
+; SI: v_cmp_eq_i32_e64 [[CMP_ELSE:s\[[0-9]+:[0-9]+\]]], [[AVAL]], 0
+; SI: v_cndmask_b32_e64 [[V_CMP]], 0, -1, [[CMP_ELSE]]
+
+; SI: v_cmp_ne_i32_e64 [[CMP_CMP:s\[[0-9]+:[0-9]+\]]], [[V_CMP]], 0
+; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP_CMP]]
+; SI: buffer_store_dword [[RESULT]]
+define void @sgpr_if_else_valu_cmp_phi_br(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
+entry:
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %tmp1 = icmp eq i32 %tid, 0
+ br i1 %tmp1, label %if, label %else
+
+if:
+ %gep.if = getelementptr i32 addrspace(1)* %a, i32 %tid
+ %a.val = load i32 addrspace(1)* %gep.if
+ %cmp.if = icmp eq i32 %a.val, 0
+ br label %endif
+
+else:
+ %gep.else = getelementptr i32 addrspace(1)* %b, i32 %tid
+ %b.val = load i32 addrspace(1)* %gep.else
+ %cmp.else = icmp slt i32 %b.val, 0
+ br label %endif
+
+endif:
+ %tmp4 = phi i1 [%cmp.if, %if], [%cmp.else, %else]
+ %ext = sext i1 %tmp4 to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+declare i32 @llvm.r600.read.tidig.x() #0
+
+attributes #0 = { readnone }
diff --git a/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll b/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll
index 9d8a623125fe..ad5e0a7765af 100644
--- a/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll
+++ b/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
; Copy VGPR -> SGPR used twice as an instruction operand, which is then
; used in an REG_SEQUENCE that also needs to be handled.
-; SI-LABEL: @test_dup_operands:
-; SI: V_ADD_I32_e32
+; SI-LABEL: {{^}}test_dup_operands:
+; SI: v_add_i32_e32
define void @test_dup_operands(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) {
%a = load <2 x i32> addrspace(1)* %in
%lo = extractelement <2 x i32> %a, i32 0
diff --git a/test/CodeGen/R600/sgpr-copy.ll b/test/CodeGen/R600/sgpr-copy.ll
index c7d5bf90644e..db11af5cdabb 100644
--- a/test/CodeGen/R600/sgpr-copy.ll
+++ b/test/CodeGen/R600/sgpr-copy.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
; This test checks that no VGPR to SGPR copies are created by the register
; allocator.
-; CHECK-LABEL: @phi1
-; CHECK: S_BUFFER_LOAD_DWORD [[DST:s[0-9]]], {{s\[[0-9]+:[0-9]+\]}}, 0
-; CHECK: V_MOV_B32_e32 v{{[0-9]}}, [[DST]]
+; CHECK-LABEL: {{^}}phi1:
+; CHECK: s_buffer_load_dword [[DST:s[0-9]]], {{s\[[0-9]+:[0-9]+\]}}, 0x0
+; CHECK: v_mov_b32_e32 v{{[0-9]}}, [[DST]]
define void @phi1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
@@ -29,7 +29,7 @@ ENDIF: ; preds = %main_body, %ELSE
}
; Make sure this program doesn't crash
-; CHECK-LABEL: @phi2
+; CHECK-LABEL: {{^}}phi2:
define void @phi2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
%20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
@@ -149,7 +149,7 @@ ENDIF24: ; preds = %ENDIF, %IF25
}
; We just want ot make sure the program doesn't crash
-; CHECK-LABEL: @loop
+; CHECK-LABEL: {{^}}loop:
define void @loop(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
@@ -202,8 +202,8 @@ attributes #2 = { readonly }
attributes #3 = { readnone }
attributes #4 = { nounwind readonly }
-!0 = metadata !{metadata !"const", null}
-!1 = metadata !{metadata !0, metadata !0, i64 0, i32 1}
+!0 = !{!"const", null}
+!1 = !{!0, !0, i64 0, i32 1}
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
@@ -227,11 +227,11 @@ declare i32 @llvm.SI.packf16(float, float) #1
; registers were being identified as an SGPR regclass which was causing
; an assertion failure.
-; CHECK-LABEL: @sample_v3
-; CHECK: IMAGE_SAMPLE
-; CHECK: IMAGE_SAMPLE
-; CHECK: EXP
-; CHECK: S_ENDPGM
+; CHECK-LABEL: {{^}}sample_v3:
+; CHECK: image_sample
+; CHECK: image_sample
+; CHECK: exp
+; CHECK: s_endpgm
define void @sample_v3([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
entry:
@@ -267,12 +267,12 @@ endif:
ret void
}
-!2 = metadata !{metadata !"const", null, i32 1}
+!2 = !{!"const", null, i32 1}
-; CHECK-LABEL: @copy1
-; CHECK: BUFFER_LOAD_DWORD
-; CHECK: V_ADD
-; CHECK: S_ENDPGM
+; CHECK-LABEL: {{^}}copy1:
+; CHECK: buffer_load_dword
+; CHECK: v_add
+; CHECK: s_endpgm
define void @copy1(float addrspace(1)* %out, float addrspace(1)* %in0) {
entry:
%0 = load float addrspace(1)* %in0
@@ -296,8 +296,8 @@ endif:
}
; This test is just checking that we don't crash / assertion fail.
-; CHECK-LABEL: @copy2
-; CHECK: S_ENDPGM
+; CHECK-LABEL: {{^}}copy2:
+; CHECK: s_endpgm
define void @copy2([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
entry:
@@ -325,3 +325,54 @@ ENDIF69:
attributes #0 = { "ShaderType"="0" }
+; This test checks that image_sample resource descriptors aren't loaded into
+; vgprs. The verifier will fail if this happens.
+; CHECK-LABEL:{{^}}sample_rsrc:
+; CHECK: image_sample
+; CHECK: image_sample
+; CHECK: s_endpgm
+define void @sample_rsrc([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
+bb:
+ %tmp = getelementptr [17 x <16 x i8>] addrspace(2)* %arg1, i32 0, i32 0
+ %tmp22 = load <16 x i8> addrspace(2)* %tmp, !tbaa !0
+ %tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp22, i32 16)
+ %tmp25 = getelementptr [32 x <8 x i32>] addrspace(2)* %arg3, i32 0, i32 0
+ %tmp26 = load <8 x i32> addrspace(2)* %tmp25, !tbaa !0
+ %tmp27 = getelementptr [16 x <4 x i32>] addrspace(2)* %arg2, i32 0, i32 0
+ %tmp28 = load <4 x i32> addrspace(2)* %tmp27, !tbaa !0
+ %tmp29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %arg5, <2 x i32> %arg7)
+ %tmp30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %arg5, <2 x i32> %arg7)
+ %tmp31 = bitcast float %tmp23 to i32
+ %tmp36 = icmp ne i32 %tmp31, 0
+ br i1 %tmp36, label %bb38, label %bb80
+
+bb38: ; preds = %bb
+ %tmp52 = bitcast float %tmp29 to i32
+ %tmp53 = bitcast float %tmp30 to i32
+ %tmp54 = insertelement <2 x i32> undef, i32 %tmp52, i32 0
+ %tmp55 = insertelement <2 x i32> %tmp54, i32 %tmp53, i32 1
+ %tmp56 = bitcast <8 x i32> %tmp26 to <32 x i8>
+ %tmp57 = bitcast <4 x i32> %tmp28 to <16 x i8>
+ %tmp58 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp55, <32 x i8> %tmp56, <16 x i8> %tmp57, i32 2)
+ br label %bb71
+
+bb80: ; preds = %bb
+ %tmp81 = bitcast float %tmp29 to i32
+ %tmp82 = bitcast float %tmp30 to i32
+ %tmp82.2 = add i32 %tmp82, 1
+ %tmp83 = insertelement <2 x i32> undef, i32 %tmp81, i32 0
+ %tmp84 = insertelement <2 x i32> %tmp83, i32 %tmp82.2, i32 1
+ %tmp85 = bitcast <8 x i32> %tmp26 to <32 x i8>
+ %tmp86 = bitcast <4 x i32> %tmp28 to <16 x i8>
+ %tmp87 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp84, <32 x i8> %tmp85, <16 x i8> %tmp86, i32 2)
+ br label %bb71
+
+bb71: ; preds = %bb80, %bb38
+ %tmp72 = phi <4 x float> [ %tmp58, %bb38 ], [ %tmp87, %bb80 ]
+ %tmp88 = extractelement <4 x float> %tmp72, i32 0
+ call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp88, float %tmp88, float %tmp88, float %tmp88)
+ ret void
+}
+
+attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/shared-op-cycle.ll b/test/CodeGen/R600/shared-op-cycle.ll
index 0484fc9a8563..f52a9baf4d18 100644
--- a/test/CodeGen/R600/shared-op-cycle.ll
+++ b/test/CodeGen/R600/shared-op-cycle.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-; CHECK: @main
+; CHECK: {{^}}main:
; CHECK: MULADD_IEEE *
; CHECK-NOT: MULADD_IEEE *
diff --git a/test/CodeGen/R600/shl.ll b/test/CodeGen/R600/shl.ll
index 43fab2a39dcc..98d9494e4e11 100644
--- a/test/CodeGen/R600/shl.ll
+++ b/test/CodeGen/R600/shl.ll
@@ -1,13 +1,13 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
-;EG-CHECK: @shl_v2i32
+;EG-CHECK: {{^}}shl_v2i32:
;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI-CHECK: @shl_v2i32
-;SI-CHECK: V_LSHL_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_LSHL_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: {{^}}shl_v2i32:
+;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
@@ -18,17 +18,17 @@ define void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in
ret void
}
-;EG-CHECK: @shl_v4i32
+;EG-CHECK: {{^}}shl_v4i32:
;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI-CHECK: @shl_v4i32
-;SI-CHECK: V_LSHL_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_LSHL_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_LSHL_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_LSHL_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: {{^}}shl_v4i32:
+;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
@@ -39,7 +39,7 @@ define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in
ret void
}
-;EG-CHECK: @shl_i64
+;EG-CHECK: {{^}}shl_i64:
;EG-CHECK: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
;EG-CHECK: LSHR {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
;EG-CHECK: LSHR {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
@@ -51,8 +51,8 @@ define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in
;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
-;SI-CHECK: @shl_i64
-;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: {{^}}shl_i64:
+;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
@@ -63,7 +63,7 @@ define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
ret void
}
-;EG-CHECK: @shl_v2i64
+;EG-CHECK: {{^}}shl_v2i64:
;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHA]]
@@ -85,9 +85,9 @@ define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
;EG-CHECK-DAG: CNDE_INT
;EG-CHECK-DAG: CNDE_INT
-;SI-CHECK: @shl_v2i64
-;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: {{^}}shl_v2i64:
+;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
@@ -98,7 +98,7 @@ define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in
ret void
}
-;EG-CHECK: @shl_v4i64
+;EG-CHECK: {{^}}shl_v4i64:
;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]]
@@ -140,11 +140,11 @@ define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in
;EG-CHECK-DAG: CNDE_INT
;EG-CHECK-DAG: CNDE_INT
-;SI-CHECK: @shl_v4i64
-;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: {{^}}shl_v4i64:
+;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
define void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
diff --git a/test/CodeGen/R600/shl_add_constant.ll b/test/CodeGen/R600/shl_add_constant.ll
new file mode 100644
index 000000000000..6915495beece
--- /dev/null
+++ b/test/CodeGen/R600/shl_add_constant.ll
@@ -0,0 +1,90 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; Test with inline immediate
+
+; FUNC-LABEL: {{^}}shl_2_add_9_i32:
+; SI: v_lshlrev_b32_e32 [[REG:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], 36, [[REG]]
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+ %ptr = getelementptr i32 addrspace(1)* %in, i32 %tid.x
+ %val = load i32 addrspace(1)* %ptr, align 4
+ %add = add i32 %val, 9
+ %result = shl i32 %add, 2
+ store i32 %result, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}shl_2_add_9_i32_2_add_uses:
+; SI-DAG: v_add_i32_e32 [[ADDREG:v[0-9]+]], 9, {{v[0-9]+}}
+; SI-DAG: v_lshlrev_b32_e32 [[SHLREG:v[0-9]+]], 2, {{v[0-9]+}}
+; SI-DAG: buffer_store_dword [[ADDREG]]
+; SI-DAG: buffer_store_dword [[SHLREG]]
+; SI: s_endpgm
+define void @shl_2_add_9_i32_2_add_uses(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %in) #0 {
+ %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+ %ptr = getelementptr i32 addrspace(1)* %in, i32 %tid.x
+ %val = load i32 addrspace(1)* %ptr, align 4
+ %add = add i32 %val, 9
+ %result = shl i32 %add, 2
+ store i32 %result, i32 addrspace(1)* %out0, align 4
+ store i32 %add, i32 addrspace(1)* %out1, align 4
+ ret void
+}
+
+; Test with add literal constant
+
+; FUNC-LABEL: {{^}}shl_2_add_999_i32:
+; SI: v_lshlrev_b32_e32 [[REG:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], 0xf9c, [[REG]]
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @shl_2_add_999_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+ %ptr = getelementptr i32 addrspace(1)* %in, i32 %tid.x
+ %val = load i32 addrspace(1)* %ptr, align 4
+ %shl = add i32 %val, 999
+ %result = shl i32 %shl, 2
+ store i32 %result, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_add_shl_add_constant:
+; SI-DAG: s_load_dword [[X:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[Y:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: s_lshl_b32 [[SHL3:s[0-9]+]], [[X]], 3
+; SI: s_add_i32 [[TMP:s[0-9]+]], [[SHL3]], [[Y]]
+; SI: s_add_i32 [[RESULT:s[0-9]+]], [[TMP]], 0x3d8
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
+; SI: buffer_store_dword [[VRESULT]]
+define void @test_add_shl_add_constant(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
+ %add.0 = add i32 %x, 123
+ %shl = shl i32 %add.0, 3
+ %add.1 = add i32 %shl, %y
+ store i32 %add.1, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_add_shl_add_constant_inv:
+; SI-DAG: s_load_dword [[X:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[Y:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: s_lshl_b32 [[SHL3:s[0-9]+]], [[X]], 3
+; SI: s_add_i32 [[TMP:s[0-9]+]], [[SHL3]], [[Y]]
+; SI: s_add_i32 [[RESULT:s[0-9]+]], [[TMP]], 0x3d8
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
+; SI: buffer_store_dword [[VRESULT]]
+
+define void @test_add_shl_add_constant_inv(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
+ %add.0 = add i32 %x, 123
+ %shl = shl i32 %add.0, 3
+ %add.1 = add i32 %y, %shl
+ store i32 %add.1, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/shl_add_ptr.ll b/test/CodeGen/R600/shl_add_ptr.ll
new file mode 100644
index 000000000000..cef48e227ef4
--- /dev/null
+++ b/test/CodeGen/R600/shl_add_ptr.ll
@@ -0,0 +1,283 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI %s
+
+; Test that doing a shift of a pointer with a constant add will be
+; folded into the constant offset addressing mode even if the add has
+; multiple uses. This is relevant to accessing 2 separate, adjacent
+; LDS globals.
+
+
+declare i32 @llvm.r600.read.tidig.x() #1
+
+@lds0 = addrspace(3) global [512 x float] undef, align 4
+@lds1 = addrspace(3) global [512 x float] undef, align 4
+
+
+; Make sure the (add tid, 2) << 2 gets folded into the ds's offset as (tid << 2) + 8
+
+; SI-LABEL: {{^}}load_shl_base_lds_0:
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_read_b32 {{v[0-9]+}}, [[PTR]] offset:8 [M0]
+; SI: s_endpgm
+define void @load_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+ %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+ %idx.0 = add nsw i32 %tid.x, 2
+ %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
+ %val0 = load float addrspace(3)* %arrayidx0, align 4
+ store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+ store float %val0, float addrspace(1)* %out
+ ret void
+}
+
+; Make sure once the first use is folded into the addressing mode, the
+; remaining add use goes through the normal shl + add constant fold.
+
+; SI-LABEL: {{^}}load_shl_base_lds_1:
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_read_b32 [[RESULT:v[0-9]+]], [[PTR]] offset:8 [M0]
+; SI: v_add_i32_e32 [[ADDUSE:v[0-9]+]], 8, v{{[0-9]+}}
+; SI-DAG: buffer_store_dword [[RESULT]]
+; SI-DAG: buffer_store_dword [[ADDUSE]]
+; SI: s_endpgm
+define void @load_shl_base_lds_1(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+ %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+ %idx.0 = add nsw i32 %tid.x, 2
+ %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
+ %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %shl_add_use = shl i32 %idx.0, 2
+ store i32 %shl_add_use, i32 addrspace(1)* %add_use, align 4
+ store float %val0, float addrspace(1)* %out
+ ret void
+}
+
+@maxlds = addrspace(3) global [65536 x i8] undef, align 4
+
+; SI-LABEL: {{^}}load_shl_base_lds_max_offset
+; SI: ds_read_u8 v{{[0-9]+}}, v{{[0-9]+}} offset:65535
+; SI: s_endpgm
+define void @load_shl_base_lds_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %lds, i32 addrspace(1)* %add_use) #0 {
+ %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+ %idx.0 = add nsw i32 %tid.x, 65535
+ %arrayidx0 = getelementptr inbounds [65536 x i8] addrspace(3)* @maxlds, i32 0, i32 %idx.0
+ %val0 = load i8 addrspace(3)* %arrayidx0
+ store i32 %idx.0, i32 addrspace(1)* %add_use
+ store i8 %val0, i8 addrspace(1)* %out
+ ret void
+}
+
+; The two globals are placed adjacent in memory, so the same base
+; pointer can be used with an offset into the second one.
+
+; SI-LABEL: {{^}}load_shl_base_lds_2:
+; SI: s_mov_b32 m0, -1
+; SI-NEXT: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI-NEXT: ds_read2st64_b32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:1 offset1:9 [M0]
+; SI: s_endpgm
+define void @load_shl_base_lds_2(float addrspace(1)* %out) #0 {
+ %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+ %idx.0 = add nsw i32 %tid.x, 64
+ %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
+ %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds1, i32 0, i32 %idx.0
+ %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %sum = fadd float %val0, %val1
+ store float %sum, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}store_shl_base_lds_0:
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_write_b32 [[PTR]], {{v[0-9]+}} offset:8 [M0]
+; SI: s_endpgm
+define void @store_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+ %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+ %idx.0 = add nsw i32 %tid.x, 2
+ %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
+ store float 1.0, float addrspace(3)* %arrayidx0, align 4
+ store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+ ret void
+}
+
+
+; --------------------------------------------------------------------------------
+; Atomics.
+
+@lds2 = addrspace(3) global [512 x i32] undef, align 4
+
+; define void @atomic_load_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+; %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+; %idx.0 = add nsw i32 %tid.x, 2
+; %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+; %val = load atomic i32 addrspace(3)* %arrayidx0 seq_cst, align 4
+; store i32 %val, i32 addrspace(1)* %out, align 4
+; store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+; ret void
+; }
+
+
+; SI-LABEL: {{^}}atomic_cmpxchg_shl_base_lds_0:
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_cmpst_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, {{v[0-9]+}} offset:8
+; SI: s_endpgm
+define void @atomic_cmpxchg_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use, i32 %swap) #0 {
+ %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+ %idx.0 = add nsw i32 %tid.x, 2
+ %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %pair = cmpxchg i32 addrspace(3)* %arrayidx0, i32 7, i32 %swap seq_cst monotonic
+ %result = extractvalue { i32, i1 } %pair, 0
+ store i32 %result, i32 addrspace(1)* %out, align 4
+ store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}atomic_swap_shl_base_lds_0:
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_wrxchg_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; SI: s_endpgm
+define void @atomic_swap_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+ %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+ %idx.0 = add nsw i32 %tid.x, 2
+ %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %val = atomicrmw xchg i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}atomic_add_shl_base_lds_0:
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_add_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; SI: s_endpgm
+define void @atomic_add_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+ %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+ %idx.0 = add nsw i32 %tid.x, 2
+ %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %val = atomicrmw add i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}atomic_sub_shl_base_lds_0:
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_sub_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; SI: s_endpgm
+define void @atomic_sub_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+ %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+ %idx.0 = add nsw i32 %tid.x, 2
+ %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %val = atomicrmw sub i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}atomic_and_shl_base_lds_0:
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_and_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; SI: s_endpgm
+define void @atomic_and_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+ %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+ %idx.0 = add nsw i32 %tid.x, 2
+ %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %val = atomicrmw and i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}atomic_or_shl_base_lds_0:
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_or_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; SI: s_endpgm
+define void @atomic_or_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+ %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+ %idx.0 = add nsw i32 %tid.x, 2
+ %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %val = atomicrmw or i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}atomic_xor_shl_base_lds_0:
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_xor_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; SI: s_endpgm
+define void @atomic_xor_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+ %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+ %idx.0 = add nsw i32 %tid.x, 2
+ %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %val = atomicrmw xor i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+ ret void
+}
+
+; define void @atomic_nand_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+; %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+; %idx.0 = add nsw i32 %tid.x, 2
+; %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+; %val = atomicrmw nand i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
+; store i32 %val, i32 addrspace(1)* %out, align 4
+; store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+; ret void
+; }
+
+; SI-LABEL: {{^}}atomic_min_shl_base_lds_0:
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_min_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; SI: s_endpgm
+define void @atomic_min_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+ %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+ %idx.0 = add nsw i32 %tid.x, 2
+ %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %val = atomicrmw min i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}atomic_max_shl_base_lds_0:
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_max_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; SI: s_endpgm
+define void @atomic_max_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+ %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+ %idx.0 = add nsw i32 %tid.x, 2
+ %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %val = atomicrmw max i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}atomic_umin_shl_base_lds_0:
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_min_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; SI: s_endpgm
+define void @atomic_umin_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+ %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+ %idx.0 = add nsw i32 %tid.x, 2
+ %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %val = atomicrmw umin i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}atomic_umax_shl_base_lds_0:
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_max_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; SI: s_endpgm
+define void @atomic_umax_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+ %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+ %idx.0 = add nsw i32 %tid.x, 2
+ %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %val = atomicrmw umax i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/si-annotate-cf-assertion.ll b/test/CodeGen/R600/si-annotate-cf-assertion.ll
index daa4667150bc..515064f3076b 100644
--- a/test/CodeGen/R600/si-annotate-cf-assertion.ll
+++ b/test/CodeGen/R600/si-annotate-cf-assertion.ll
@@ -1,10 +1,10 @@
; REQUIRES: asserts
; XFAIL: *
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs-asm-verbose=false < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs-asm-verbose=false < %s | FileCheck %s
define void @test(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind {
-; CHECK-LABEL: @test:
+; CHECK-LABEL: {{^}}test:
entry:
switch i32 %x, label %sw.default [
diff --git a/test/CodeGen/R600/si-lod-bias.ll b/test/CodeGen/R600/si-lod-bias.ll
index 8d7a79cdbda0..2e2f2ce5fec8 100644
--- a/test/CodeGen/R600/si-lod-bias.ll
+++ b/test/CodeGen/R600/si-lod-bias.ll
@@ -1,10 +1,10 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
; This shader has the potential to generated illegal VGPR to SGPR copies if
; the wrong register class is used for the REG_SEQUENCE instructions.
-; CHECK: @main
-; CHECK: IMAGE_SAMPLE_B v{{\[[0-9]:[0-9]\]}}, 15, 0, 0, 0, 0, 0, 0, 0, v{{\[[0-9]:[0-9]\]}}
+; CHECK: {{^}}main:
+; CHECK: image_sample_b v{{\[[0-9]:[0-9]\]}}, 15, 0, 0, 0, 0, 0, 0, 0, v{{\[[0-9]:[0-9]\]}}
define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
@@ -47,5 +47,5 @@ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float
attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
-!0 = metadata !{metadata !"const", null}
-!1 = metadata !{metadata !0, metadata !0, i64 0, i32 1}
+!0 = !{!"const", null}
+!1 = !{!0, !0, i64 0, i32 1}
diff --git a/test/CodeGen/R600/si-sgpr-spill.ll b/test/CodeGen/R600/si-sgpr-spill.ll
index 53a096513bbc..f1c20fe89773 100644
--- a/test/CodeGen/R600/si-sgpr-spill.ll
+++ b/test/CodeGen/R600/si-sgpr-spill.ll
@@ -1,12 +1,12 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck %s
; These tests check that the compiler won't crash when it needs to spill
; SGPRs.
-; CHECK-LABEL: @main
+; CHECK-LABEL: {{^}}main:
; Writing to M0 from an SMRD instruction will hang the GPU.
-; CHECK-NOT: S_BUFFER_LOAD_DWORD m0
-; CHECK: S_ENDPGM
+; CHECK-NOT: s_buffer_load_dword m0
+; CHECK: s_endpgm
@ddxy_lds = external addrspace(3) global [64 x i32]
define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
@@ -686,10 +686,10 @@ attributes #2 = { readnone }
attributes #3 = { readonly }
attributes #4 = { nounwind readonly }
-!0 = metadata !{metadata !"const", null, i32 1}
+!0 = !{!"const", null, i32 1}
-; CHECK-LABEL: @main1
-; CHECK: S_ENDPGM
+; CHECK-LABEL: {{^}}main1:
+; CHECK: s_endpgm
define void @main1([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
%21 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
diff --git a/test/CodeGen/R600/si-triv-disjoint-mem-access.ll b/test/CodeGen/R600/si-triv-disjoint-mem-access.ll
new file mode 100644
index 000000000000..b2f4a9ff05e1
--- /dev/null
+++ b/test/CodeGen/R600/si-triv-disjoint-mem-access.ll
@@ -0,0 +1,238 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -enable-misched -enable-aa-sched-mi < %s | FileCheck -check-prefix=FUNC -check-prefix=CI %s
+
+declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
+declare void @llvm.SI.tbuffer.store.v4i32(<16 x i8>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
+declare void @llvm.AMDGPU.barrier.local() #2
+
+
+@stored_lds_ptr = addrspace(3) global i32 addrspace(3)* undef, align 4
+@stored_constant_ptr = addrspace(3) global i32 addrspace(2)* undef, align 8
+@stored_global_ptr = addrspace(3) global i32 addrspace(1)* undef, align 8
+
+; FUNC-LABEL: @reorder_local_load_global_store_local_load
+; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4
+; CI-NEXT: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8
+; CI: buffer_store_dword
+define void @reorder_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
+ %ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
+
+ %ptr1 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 1
+ %ptr2 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 2
+
+ %tmp1 = load i32 addrspace(3)* %ptr1, align 4
+ store i32 99, i32 addrspace(1)* %gptr, align 4
+ %tmp2 = load i32 addrspace(3)* %ptr2, align 4
+
+ %add = add nsw i32 %tmp1, %tmp2
+
+ store i32 %add, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @no_reorder_local_load_volatile_global_store_local_load
+; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4
+; CI: buffer_store_dword
+; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8
+define void @no_reorder_local_load_volatile_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
+ %ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
+
+ %ptr1 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 1
+ %ptr2 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 2
+
+ %tmp1 = load i32 addrspace(3)* %ptr1, align 4
+ store volatile i32 99, i32 addrspace(1)* %gptr, align 4
+ %tmp2 = load i32 addrspace(3)* %ptr2, align 4
+
+ %add = add nsw i32 %tmp1, %tmp2
+
+ store i32 %add, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @no_reorder_barrier_local_load_global_store_local_load
+; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4
+; CI: buffer_store_dword
+; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8
+define void @no_reorder_barrier_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
+ %ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
+
+ %ptr1 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 1
+ %ptr2 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 2
+
+ %tmp1 = load i32 addrspace(3)* %ptr1, align 4
+ store i32 99, i32 addrspace(1)* %gptr, align 4
+ call void @llvm.AMDGPU.barrier.local() #2
+ %tmp2 = load i32 addrspace(3)* %ptr2, align 4
+
+ %add = add nsw i32 %tmp1, %tmp2
+
+ store i32 %add, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; Technically we could reorder these, but just comparing the
+; instruction type of the load is insufficient.
+
+; FUNC-LABEL: @no_reorder_constant_load_global_store_constant_load
+; CI: buffer_load_dword
+; CI: buffer_store_dword
+; CI: buffer_load_dword
+; CI: buffer_store_dword
+define void @no_reorder_constant_load_global_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
+ %ptr0 = load i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8
+
+ %ptr1 = getelementptr inbounds i32 addrspace(2)* %ptr0, i64 1
+ %ptr2 = getelementptr inbounds i32 addrspace(2)* %ptr0, i64 2
+
+ %tmp1 = load i32 addrspace(2)* %ptr1, align 4
+ store i32 99, i32 addrspace(1)* %gptr, align 4
+ %tmp2 = load i32 addrspace(2)* %ptr2, align 4
+
+ %add = add nsw i32 %tmp1, %tmp2
+
+ store i32 %add, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; XXX: Should be able to reorder this, but the laods count as ordered
+
+; FUNC-LABEL: @reorder_constant_load_local_store_constant_load
+; CI: buffer_load_dword
+; CI: ds_write_b32
+; CI: buffer_load_dword
+; CI: buffer_store_dword
+define void @reorder_constant_load_local_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(3)* %lptr) #0 {
+ %ptr0 = load i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8
+
+ %ptr1 = getelementptr inbounds i32 addrspace(2)* %ptr0, i64 1
+ %ptr2 = getelementptr inbounds i32 addrspace(2)* %ptr0, i64 2
+
+ %tmp1 = load i32 addrspace(2)* %ptr1, align 4
+ store i32 99, i32 addrspace(3)* %lptr, align 4
+ %tmp2 = load i32 addrspace(2)* %ptr2, align 4
+
+ %add = add nsw i32 %tmp1, %tmp2
+
+ store i32 %add, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @reorder_smrd_load_local_store_smrd_load
+; CI: s_load_dword
+; CI: s_load_dword
+; CI: s_load_dword
+; CI: ds_write_b32
+; CI: buffer_store_dword
+define void @reorder_smrd_load_local_store_smrd_load(i32 addrspace(1)* %out, i32 addrspace(3)* noalias %lptr, i32 addrspace(2)* %ptr0) #0 {
+ %ptr1 = getelementptr inbounds i32 addrspace(2)* %ptr0, i64 1
+ %ptr2 = getelementptr inbounds i32 addrspace(2)* %ptr0, i64 2
+
+ %tmp1 = load i32 addrspace(2)* %ptr1, align 4
+ store i32 99, i32 addrspace(3)* %lptr, align 4
+ %tmp2 = load i32 addrspace(2)* %ptr2, align 4
+
+ %add = add nsw i32 %tmp1, %tmp2
+
+ store i32 %add, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @reorder_global_load_local_store_global_load
+; CI: buffer_load_dword
+; CI: buffer_load_dword
+; CI: ds_write_b32
+; CI: buffer_store_dword
+define void @reorder_global_load_local_store_global_load(i32 addrspace(1)* %out, i32 addrspace(3)* %lptr, i32 addrspace(1)* %ptr0) #0 {
+ %ptr1 = getelementptr inbounds i32 addrspace(1)* %ptr0, i64 1
+ %ptr2 = getelementptr inbounds i32 addrspace(1)* %ptr0, i64 2
+
+ %tmp1 = load i32 addrspace(1)* %ptr1, align 4
+ store i32 99, i32 addrspace(3)* %lptr, align 4
+ %tmp2 = load i32 addrspace(1)* %ptr2, align 4
+
+ %add = add nsw i32 %tmp1, %tmp2
+
+ store i32 %add, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @reorder_local_offsets
+; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12
+; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:400
+; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:404
+; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:400
+; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:404
+; CI: buffer_store_dword
+; CI: s_endpgm
+define void @reorder_local_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(3)* noalias nocapture %ptr0) #0 {
+ %ptr1 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 3
+ %ptr2 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 100
+ %ptr3 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 101
+
+ store i32 123, i32 addrspace(3)* %ptr1, align 4
+ %tmp1 = load i32 addrspace(3)* %ptr2, align 4
+ %tmp2 = load i32 addrspace(3)* %ptr3, align 4
+ store i32 123, i32 addrspace(3)* %ptr2, align 4
+ %tmp3 = load i32 addrspace(3)* %ptr1, align 4
+ store i32 789, i32 addrspace(3)* %ptr3, align 4
+
+ %add.0 = add nsw i32 %tmp2, %tmp1
+ %add.1 = add nsw i32 %add.0, %tmp3
+ store i32 %add.1, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @reorder_global_offsets
+; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
+; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
+; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404
+; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
+; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404
+; CI: buffer_store_dword
+; CI: s_endpgm
+define void @reorder_global_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(1)* noalias nocapture %ptr0) #0 {
+ %ptr1 = getelementptr inbounds i32 addrspace(1)* %ptr0, i32 3
+ %ptr2 = getelementptr inbounds i32 addrspace(1)* %ptr0, i32 100
+ %ptr3 = getelementptr inbounds i32 addrspace(1)* %ptr0, i32 101
+
+ store i32 123, i32 addrspace(1)* %ptr1, align 4
+ %tmp1 = load i32 addrspace(1)* %ptr2, align 4
+ %tmp2 = load i32 addrspace(1)* %ptr3, align 4
+ store i32 123, i32 addrspace(1)* %ptr2, align 4
+ %tmp3 = load i32 addrspace(1)* %ptr1, align 4
+ store i32 789, i32 addrspace(1)* %ptr3, align 4
+
+ %add.0 = add nsw i32 %tmp2, %tmp1
+ %add.1 = add nsw i32 %add.0, %tmp3
+ store i32 %add.1, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; XFUNC-LABEL: @reorder_local_load_tbuffer_store_local_load
+; XCI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}}, 0x4
+; XCI: TBUFFER_STORE_FORMAT
+; XCI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}}, 0x8
+; define void @reorder_local_load_tbuffer_store_local_load(i32 addrspace(1)* %out, i32 %a1, i32 %vaddr) #1 {
+; %ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
+
+; %ptr1 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 1
+; %ptr2 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 2
+
+; %tmp1 = load i32 addrspace(3)* %ptr1, align 4
+
+; %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
+; call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
+; i32 4, i32 %vaddr, i32 0, i32 32, i32 14, i32 4, i32 1, i32 0, i32 1,
+; i32 1, i32 0)
+
+; %tmp2 = load i32 addrspace(3)* %ptr2, align 4
+
+; %add = add nsw i32 %tmp1, %tmp2
+
+; store i32 %add, i32 addrspace(1)* %out, align 4
+; ret void
+; }
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { "ShaderType"="1" nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #2 = { nounwind noduplicate }
diff --git a/test/CodeGen/R600/si-vector-hang.ll b/test/CodeGen/R600/si-vector-hang.ll
index 093234f71958..8cbb4914c857 100644
--- a/test/CodeGen/R600/si-vector-hang.ll
+++ b/test/CodeGen/R600/si-vector-hang.ll
@@ -1,14 +1,14 @@
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-; CHECK: @test_8_min_char
-; CHECK: BUFFER_STORE_BYTE
-; CHECK: BUFFER_STORE_BYTE
-; CHECK: BUFFER_STORE_BYTE
-; CHECK: BUFFER_STORE_BYTE
-; CHECK: BUFFER_STORE_BYTE
-; CHECK: BUFFER_STORE_BYTE
-; CHECK: BUFFER_STORE_BYTE
-; CHECK: BUFFER_STORE_BYTE
+; CHECK: {{^}}test_8_min_char:
+; CHECK: buffer_store_byte
+; CHECK: buffer_store_byte
+; CHECK: buffer_store_byte
+; CHECK: buffer_store_byte
+; CHECK: buffer_store_byte
+; CHECK: buffer_store_byte
+; CHECK: buffer_store_byte
+; CHECK: buffer_store_byte
; ModuleID = 'radeon'
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
target triple = "r600--"
@@ -96,12 +96,12 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"=
!opencl.kernels = !{!0, !1, !2, !3, !4, !5, !6, !7, !8}
-!0 = metadata !{null}
-!1 = metadata !{null}
-!2 = metadata !{null}
-!3 = metadata !{void (i8 addrspace(1)*, i8 addrspace(1)*, i8 addrspace(1)*)* @test_8_min_char}
-!4 = metadata !{null}
-!5 = metadata !{null}
-!6 = metadata !{null}
-!7 = metadata !{null}
-!8 = metadata !{null}
+!0 = !{null}
+!1 = !{null}
+!2 = !{null}
+!3 = !{void (i8 addrspace(1)*, i8 addrspace(1)*, i8 addrspace(1)*)* @test_8_min_char}
+!4 = !{null}
+!5 = !{null}
+!6 = !{null}
+!7 = !{null}
+!8 = !{null}
diff --git a/test/CodeGen/R600/sign_extend.ll b/test/CodeGen/R600/sign_extend.ll
index e3bee507de67..1d90fdbec853 100644
--- a/test/CodeGen/R600/sign_extend.ll
+++ b/test/CodeGen/R600/sign_extend.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-; SI-LABEL: @s_sext_i1_to_i32:
-; SI: V_CNDMASK_B32_e64
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}s_sext_i1_to_i32:
+; SI: v_cndmask_b32_e64
+; SI: s_endpgm
define void @s_sext_i1_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%cmp = icmp eq i32 %a, %b
%sext = sext i1 %cmp to i32
@@ -10,10 +10,10 @@ define void @s_sext_i1_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
ret void
}
-; SI-LABEL: @test:
-; SI: V_ASHR
-; SI: S_ENDPG
-define void @test(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) nounwind {
+; SI-LABEL: {{^}}test_s_sext_i32_to_i64:
+; SI: s_ashr_i32
+; SI: s_endpg
+define void @test_s_sext_i32_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) nounwind {
entry:
%mul = mul i32 %a, %b
%add = add i32 %mul, %c
@@ -22,10 +22,10 @@ entry:
ret void
}
-; SI-LABEL: @s_sext_i1_to_i64:
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}s_sext_i1_to_i64:
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: s_endpgm
define void @s_sext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%cmp = icmp eq i32 %a, %b
%sext = sext i1 %cmp to i64
@@ -33,18 +33,18 @@ define void @s_sext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
ret void
}
-; SI-LABEL: @s_sext_i32_to_i64:
-; SI: S_ASHR_I32
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}s_sext_i32_to_i64:
+; SI: s_ashr_i32
+; SI: s_endpgm
define void @s_sext_i32_to_i64(i64 addrspace(1)* %out, i32 %a) nounwind {
%sext = sext i32 %a to i64
store i64 %sext, i64 addrspace(1)* %out, align 8
ret void
}
-; SI-LABEL: @v_sext_i32_to_i64:
-; SI: V_ASHR
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}v_sext_i32_to_i64:
+; SI: v_ashr
+; SI: s_endpgm
define void @v_sext_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%val = load i32 addrspace(1)* %in, align 4
%sext = sext i32 %val to i64
@@ -52,8 +52,8 @@ define void @v_sext_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) no
ret void
}
-; SI-LABEL: @s_sext_i16_to_i64:
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}s_sext_i16_to_i64:
+; SI: s_endpgm
define void @s_sext_i16_to_i64(i64 addrspace(1)* %out, i16 %a) nounwind {
%sext = sext i16 %a to i64
store i64 %sext, i64 addrspace(1)* %out, align 8
diff --git a/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll b/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
index e6f8ce8ef0ee..e02350cc23f7 100644
--- a/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
+++ b/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
@@ -1,5 +1,5 @@
; XFAIL: *
-; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI %s
; 64-bit select was originally lowered with a build_pair, and this
; could be simplified to 1 cndmask instead of 2, but that broken when
@@ -14,10 +14,10 @@ define void @trunc_select_i64(i32 addrspace(1)* %out, i64 %a, i64 %b, i32 %c) {
}
; FIXME: Fix truncating store for local memory
-; SI-LABEL: @trunc_load_alloca_i64:
-; SI: V_MOVRELS_B32
-; SI-NOT: V_MOVRELS_B32
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}trunc_load_alloca_i64:
+; SI: v_movrels_b32
+; SI-NOT: v_movrels_b32
+; SI: s_endpgm
define void @trunc_load_alloca_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) {
%idx = add i32 %a, %b
%alloca = alloca i64, i32 4
diff --git a/test/CodeGen/R600/sint_to_fp.f64.ll b/test/CodeGen/R600/sint_to_fp.f64.ll
new file mode 100644
index 000000000000..77844a6aa384
--- /dev/null
+++ b/test/CodeGen/R600/sint_to_fp.f64.ll
@@ -0,0 +1,60 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+; SI-LABEL: {{^}}sint_to_fp_i32_to_f64
+; SI: v_cvt_f64_i32_e32
+define void @sint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) {
+ %result = sitofp i32 %in to double
+ store double %result, double addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}sint_to_fp_i1_f64:
+; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
+; We can't fold the SGPRs into v_cndmask_b32_e64, because it already
+; uses an SGPR for [[CMP]]
+; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]]
+; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]]
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) {
+ %cmp = icmp eq i32 %in, 0
+ %fp = sitofp i1 %cmp to double
+ store double %fp, double addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}sint_to_fp_i1_f64_load:
+; SI: v_cndmask_b32_e64 [[IRESULT:v[0-9]]], 0, -1
+; SI-NEXT: v_cvt_f64_i32_e32 [[RESULT:v\[[0-9]+:[0-9]\]]], [[IRESULT]]
+; SI: buffer_store_dwordx2 [[RESULT]]
+; SI: s_endpgm
+define void @sint_to_fp_i1_f64_load(double addrspace(1)* %out, i1 %in) {
+ %fp = sitofp i1 %in to double
+ store double %fp, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; SI-LABEL: @s_sint_to_fp_i64_to_f64
+define void @s_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 %in) {
+ %result = sitofp i64 %in to double
+ store double %result, double addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: @v_sint_to_fp_i64_to_f64
+; SI: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
+; SI: v_cvt_f64_i32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
+; SI: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32
+; SI: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
+; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]]
+; SI: buffer_store_dwordx2 [[RESULT]]
+define void @v_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep = getelementptr i64 addrspace(1)* %in, i32 %tid
+ %val = load i64 addrspace(1)* %gep, align 8
+ %result = sitofp i64 %val to double
+ store double %result, double addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/sint_to_fp.ll b/test/CodeGen/R600/sint_to_fp.ll
index b27dfda8aea6..dd541bb36a24 100644
--- a/test/CodeGen/R600/sint_to_fp.ll
+++ b/test/CodeGen/R600/sint_to_fp.ll
@@ -1,28 +1,38 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
-
-; R600-CHECK: @sint_to_fp_v2i32
-; R600-CHECK-DAG: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W
-; R600-CHECK-DAG: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X
-; SI-CHECK: @sint_to_fp_v2i32
-; SI-CHECK: V_CVT_F32_I32_e32
-; SI-CHECK: V_CVT_F32_I32_e32
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+
+
+; FUNC-LABEL: {{^}}s_sint_to_fp_i32_to_f32:
+; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI: v_cvt_f32_i32_e32 {{v[0-9]+}}, {{s[0-9]+$}}
+define void @s_sint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 %in) {
+ %result = sitofp i32 %in to float
+ store float %result, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sint_to_fp_v2i32:
+; R600-DAG: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W
+; R600-DAG: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X
+
+; SI: v_cvt_f32_i32_e32
+; SI: v_cvt_f32_i32_e32
define void @sint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2 x i32> %in) {
%result = sitofp <2 x i32> %in to <2 x float>
store <2 x float> %result, <2 x float> addrspace(1)* %out
ret void
}
-; R600-CHECK: @sint_to_fp_v4i32
-; R600-CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI-CHECK: @sint_to_fp_v4i32
-; SI-CHECK: V_CVT_F32_I32_e32
-; SI-CHECK: V_CVT_F32_I32_e32
-; SI-CHECK: V_CVT_F32_I32_e32
-; SI-CHECK: V_CVT_F32_I32_e32
+; FUNC-LABEL: {{^}}sint_to_fp_v4i32:
+; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+; SI: v_cvt_f32_i32_e32
+; SI: v_cvt_f32_i32_e32
+; SI: v_cvt_f32_i32_e32
+; SI: v_cvt_f32_i32_e32
define void @sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%value = load <4 x i32> addrspace(1) * %in
%result = sitofp <4 x i32> %value to <4 x float>
@@ -30,11 +40,11 @@ define void @sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspac
ret void
}
-; FUNC-LABEL: @sint_to_fp_i1_f32:
-; SI: V_CMP_EQ_I32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
-; SI-NEXT: V_CNDMASK_B32_e64 [[RESULT:v[0-9]+]], 0, -1.000000e+00, [[CMP]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}sint_to_fp_i1_f32:
+; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
define void @sint_to_fp_i1_f32(float addrspace(1)* %out, i32 %in) {
%cmp = icmp eq i32 %in, 0
%fp = uitofp i1 %cmp to float
@@ -42,10 +52,10 @@ define void @sint_to_fp_i1_f32(float addrspace(1)* %out, i32 %in) {
ret void
}
-; FUNC-LABEL: @sint_to_fp_i1_f32_load:
-; SI: V_CNDMASK_B32_e64 [[RESULT:v[0-9]+]], 0, -1.000000e+00
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}sint_to_fp_i1_f32_load:
+; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1.0
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
define void @sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 %in) {
%fp = sitofp i1 %in to float
store float %fp, float addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/sint_to_fp64.ll b/test/CodeGen/R600/sint_to_fp64.ll
deleted file mode 100644
index 12b8cf57cf5f..000000000000
--- a/test/CodeGen/R600/sint_to_fp64.ll
+++ /dev/null
@@ -1,35 +0,0 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-
-; SI: @sint_to_fp64
-; SI: V_CVT_F64_I32_e32
-define void @sint_to_fp64(double addrspace(1)* %out, i32 %in) {
- %result = sitofp i32 %in to double
- store double %result, double addrspace(1)* %out
- ret void
-}
-
-; SI-LABEL: @sint_to_fp_i1_f64:
-; SI: V_CMP_EQ_I32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
-; FIXME: We should the VGPR sources for V_CNDMASK are copied from SGPRs,
-; we should be able to fold the SGPRs into the V_CNDMASK instructions.
-; SI: V_CNDMASK_B32_e64 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CMP]]
-; SI: V_CNDMASK_B32_e64 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CMP]]
-; SI: BUFFER_STORE_DWORDX2
-; SI: S_ENDPGM
-define void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) {
- %cmp = icmp eq i32 %in, 0
- %fp = sitofp i1 %cmp to double
- store double %fp, double addrspace(1)* %out, align 4
- ret void
-}
-
-; SI-LABEL: @sint_to_fp_i1_f64_load:
-; SI: V_CNDMASK_B32_e64 [[IRESULT:v[0-9]]], 0, -1
-; SI-NEXT: V_CVT_F64_I32_e32 [[RESULT:v\[[0-9]+:[0-9]\]]], [[IRESULT]]
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
-; SI: S_ENDPGM
-define void @sint_to_fp_i1_f64_load(double addrspace(1)* %out, i1 %in) {
- %fp = sitofp i1 %in to double
- store double %fp, double addrspace(1)* %out, align 8
- ret void
-}
diff --git a/test/CodeGen/R600/smrd.ll b/test/CodeGen/R600/smrd.ll
index dec61855b018..a66ad0201bf9 100644
--- a/test/CodeGen/R600/smrd.ll
+++ b/test/CodeGen/R600/smrd.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=r600 -mcpu=SI -show-mc-encoding -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs | FileCheck %s
; SMRD load with an immediate offset.
-; CHECK-LABEL: @smrd0
-; CHECK: S_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
+; CHECK-LABEL: {{^}}smrd0:
+; CHECK: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
entry:
%0 = getelementptr i32 addrspace(2)* %ptr, i64 1
@@ -12,8 +12,8 @@ entry:
}
; SMRD load with the largest possible immediate offset.
-; CHECK-LABEL: @smrd1
-; CHECK: S_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
+; CHECK-LABEL: {{^}}smrd1:
+; CHECK: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
entry:
%0 = getelementptr i32 addrspace(2)* %ptr, i64 255
@@ -23,9 +23,10 @@ entry:
}
; SMRD load with an offset greater than the largest possible immediate.
-; CHECK-LABEL: @smrd2
-; CHECK: S_MOV_B32 s[[OFFSET:[0-9]]], 0x400
-; CHECK: S_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
+; CHECK-LABEL: {{^}}smrd2:
+; CHECK: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
+; CHECK: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
+; CHECK: s_endpgm
define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
entry:
%0 = getelementptr i32 addrspace(2)* %ptr, i64 256
@@ -35,15 +36,15 @@ entry:
}
; SMRD load with a 64-bit offset
-; CHECK-LABEL: @smrd3
-; CHECK-DAG: S_MOV_B32 s[[SHI:[0-9]+]], 4
-; CHECK-DAG: S_MOV_B32 s[[SLO:[0-9]+]], 0
+; CHECK-LABEL: {{^}}smrd3:
+; CHECK-DAG: s_mov_b32 s[[SHI:[0-9]+]], 4
+; CHECK-DAG: s_mov_b32 s[[SLO:[0-9]+]], 0 ;
; FIXME: We don't need to copy these values to VGPRs
-; CHECK-DAG: V_MOV_B32_e32 v[[VHI:[0-9]+]], s[[SHI]]
-; CHECK-DAG: V_MOV_B32_e32 v[[VLO:[0-9]+]], s[[SLO]]
-; FIXME: We should be able to use S_LOAD_DWORD here
-; BUFFER_LOAD_DWORD v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] + v[[[VLO]]:[[VHI]]] + 0x0
-
+; CHECK-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
+; CHECK-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
+; FIXME: We should be able to use s_load_dword here
+; CHECK: buffer_load_dword v{{[0-9]+}}, v{{\[}}[[VLO]]:[[VHI]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64
+; CHECK: s_endpgm
define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
entry:
%0 = getelementptr i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32
@@ -53,8 +54,8 @@ entry:
}
; SMRD load using the load.const intrinsic with an immediate offset
-; CHECK-LABEL: @smrd_load_const0
-; CHECK: S_BUFFER_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
+; CHECK-LABEL: {{^}}smrd_load_const0:
+; CHECK: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
define void @smrd_load_const0(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
%20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
@@ -64,10 +65,10 @@ main_body:
ret void
}
-; SMRD load using the load.const intrinsic with an offset greater largest possible
-; immediate offset.
-; CHECK-LABEL: @smrd_load_const1
-; CHECK: S_BUFFER_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
+; SMRD load using the load.const intrinsic with the largest possible immediate
+; offset.
+; CHECK-LABEL: {{^}}smrd_load_const1:
+; CHECK: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
define void @smrd_load_const1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
%20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
@@ -76,10 +77,12 @@ main_body:
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
ret void
}
-; SMRD load using the load.const intrinsic with the largetst possible
+; SMRD load using the load.const intrinsic with an offset greater than the
+; largets possible immediate.
; immediate offset.
-; CHECK-LABEL: @smrd_load_const2
-; CHECK: S_BUFFER_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
+; CHECK-LABEL: {{^}}smrd_load_const2:
+; CHECK: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
+; CHECK: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
define void @smrd_load_const2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
%20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
diff --git a/test/CodeGen/R600/split-scalar-i64-add.ll b/test/CodeGen/R600/split-scalar-i64-add.ll
new file mode 100644
index 000000000000..ec50fd9f4c1e
--- /dev/null
+++ b/test/CodeGen/R600/split-scalar-i64-add.ll
@@ -0,0 +1,48 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.r600.read.tidig.x() readnone
+
+; This is broken because the low half of the 64-bit add remains on the
+; SALU, but the upper half does not. The addc expects the carry bit
+; set in vcc, which is undefined since the low scalar half add sets
+; scc instead.
+
+; FUNC-LABEL: {{^}}imp_def_vcc_split_i64_add_0:
+; SI: v_add_i32
+; SI: v_addc_u32
+define void @imp_def_vcc_split_i64_add_0(i64 addrspace(1)* %out, i32 %val) {
+ %vec.0 = insertelement <2 x i32> undef, i32 %val, i32 0
+ %vec.1 = insertelement <2 x i32> %vec.0, i32 999999, i32 1
+ %bc = bitcast <2 x i32> %vec.1 to i64
+ %add = add i64 %bc, 399
+ store i64 %add, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}imp_def_vcc_split_i64_add_1:
+; SI: v_add_i32
+; SI: v_addc_u32
+define void @imp_def_vcc_split_i64_add_1(i64 addrspace(1)* %out, i32 %val0, i64 %val1) {
+ %vec.0 = insertelement <2 x i32> undef, i32 %val0, i32 0
+ %vec.1 = insertelement <2 x i32> %vec.0, i32 99999, i32 1
+ %bc = bitcast <2 x i32> %vec.1 to i64
+ %add = add i64 %bc, %val1
+ store i64 %add, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; Doesn't use constants
+; FUNC-LABEL @imp_def_vcc_split_i64_add_2
+; SI: v_add_i32
+; SI: v_addc_u32
+define void @imp_def_vcc_split_i64_add_2(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %val0, i64 %val1) {
+ %tid = call i32 @llvm.r600.read.tidig.x() readnone
+ %gep = getelementptr i32 addrspace(1)* %in, i32 %tid
+ %load = load i32 addrspace(1)* %gep
+ %vec.0 = insertelement <2 x i32> undef, i32 %val0, i32 0
+ %vec.1 = insertelement <2 x i32> %vec.0, i32 %load, i32 1
+ %bc = bitcast <2 x i32> %vec.1 to i64
+ %add = add i64 %bc, %val1
+ store i64 %add, i64 addrspace(1)* %out, align 8
+ ret void
+}
diff --git a/test/CodeGen/R600/sra.ll b/test/CodeGen/R600/sra.ll
index 9eb3dc544074..d463fc20b9c5 100644
--- a/test/CodeGen/R600/sra.ll
+++ b/test/CodeGen/R600/sra.ll
@@ -1,13 +1,13 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
-;EG-CHECK-LABEL: @ashr_v2i32
+;EG-CHECK-LABEL: {{^}}ashr_v2i32:
;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI-CHECK-LABEL: @ashr_v2i32
-;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK-LABEL: {{^}}ashr_v2i32:
+;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
@@ -18,17 +18,17 @@ define void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %i
ret void
}
-;EG-CHECK-LABEL: @ashr_v4i32
+;EG-CHECK-LABEL: {{^}}ashr_v4i32:
;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI-CHECK-LABEL: @ashr_v4i32
-;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK-LABEL: {{^}}ashr_v4i32:
+;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
@@ -39,11 +39,11 @@ define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i
ret void
}
-;EG-CHECK-LABEL: @ashr_i64
+;EG-CHECK-LABEL: {{^}}ashr_i64:
;EG-CHECK: ASHR
-;SI-CHECK-LABEL: @ashr_i64
-;SI-CHECK: S_ASHR_I64 s[{{[0-9]}}:{{[0-9]}}], s[{{[0-9]}}:{{[0-9]}}], 8
+;SI-CHECK-LABEL: {{^}}ashr_i64:
+;SI-CHECK: s_ashr_i64 s[{{[0-9]}}:{{[0-9]}}], s[{{[0-9]}}:{{[0-9]}}], 8
define void @ashr_i64(i64 addrspace(1)* %out, i32 %in) {
entry:
%0 = sext i32 %in to i64
@@ -52,7 +52,7 @@ entry:
ret void
}
-;EG-CHECK-LABEL: @ashr_i64_2
+;EG-CHECK-LABEL: {{^}}ashr_i64_2:
;EG-CHECK: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
;EG-CHECK: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
;EG-CHECK: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
@@ -66,8 +66,8 @@ entry:
;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
-;SI-CHECK-LABEL: @ashr_i64_2
-;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK-LABEL: {{^}}ashr_i64_2:
+;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
define void @ashr_i64_2(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
entry:
%b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
@@ -78,7 +78,7 @@ entry:
ret void
}
-;EG-CHECK-LABEL: @ashr_v2i64
+;EG-CHECK-LABEL: {{^}}ashr_v2i64:
;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHA]]
@@ -104,9 +104,9 @@ entry:
;EG-CHECK-DAG: CNDE_INT
;EG-CHECK-DAG: CNDE_INT
-;SI-CHECK-LABEL: @ashr_v2i64
-;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK-LABEL: {{^}}ashr_v2i64:
+;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
@@ -117,7 +117,7 @@ define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %i
ret void
}
-;EG-CHECK-LABEL: @ashr_v4i64
+;EG-CHECK-LABEL: {{^}}ashr_v4i64:
;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]]
@@ -167,11 +167,11 @@ define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %i
;EG-CHECK-DAG: CNDE_INT
;EG-CHECK-DAG: CNDE_INT
-;SI-CHECK-LABEL: @ashr_v4i64
-;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK-LABEL: {{^}}ashr_v4i64:
+;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
define void @ashr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
diff --git a/test/CodeGen/R600/srem.ll b/test/CodeGen/R600/srem.ll
index 65e33952d29e..759c8b4ef722 100644
--- a/test/CodeGen/R600/srem.ll
+++ b/test/CodeGen/R600/srem.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI < %s
+; RUN: llc -march=amdgcn -mcpu=SI < %s
; RUN: llc -march=r600 -mcpu=redwood < %s
define void @srem_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
diff --git a/test/CodeGen/R600/srl.ll b/test/CodeGen/R600/srl.ll
index 44ad73f073ed..9e7b35e8338a 100644
--- a/test/CodeGen/R600/srl.ll
+++ b/test/CodeGen/R600/srl.ll
@@ -1,166 +1,166 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
-
-;EG-CHECK: @lshr_v2i32
-;EG-CHECK: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}lshr_i32:
+; SI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+define void @lshr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+ %a = load i32 addrspace(1)* %in
+ %b = load i32 addrspace(1)* %b_ptr
+ %result = lshr i32 %a, %b
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
-;SI-CHECK: @lshr_v2i32
-;SI-CHECK: V_LSHR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_LSHR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; FUNC-LABEL: {{^}}lshr_v2i32:
+; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @lshr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32> addrspace(1) * %in
- %b = load <2 x i32> addrspace(1) * %b_ptr
+ %a = load <2 x i32> addrspace(1)* %in
+ %b = load <2 x i32> addrspace(1)* %b_ptr
%result = lshr <2 x i32> %a, %b
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
}
+; FUNC-LABEL: {{^}}lshr_v4i32:
+; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;EG-CHECK: @lshr_v4i32
-;EG-CHECK: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-;SI-CHECK: @lshr_v4i32
-;SI-CHECK: V_LSHR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_LSHR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_LSHR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_LSHR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-
+; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32> addrspace(1) * %in
- %b = load <4 x i32> addrspace(1) * %b_ptr
+ %a = load <4 x i32> addrspace(1)* %in
+ %b = load <4 x i32> addrspace(1)* %b_ptr
%result = lshr <4 x i32> %a, %b
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
}
-;EG-CHECK: @lshr_i64
-;EG-CHECK: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
-;EG-CHECK: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
-;EG-CHECK: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
-;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
-;EG-CHECK-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]]
-;EG-CHECK-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}}
-;EG-CHECK-DAG: LSHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
-;EG-CHECK-DAG: LSHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
-;EG-CHECK-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
-;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
-;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
-
-;SI-CHECK: @lshr_i64
-;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-
+; FUNC-LABEL: {{^}}lshr_i64:
+; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
+; EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
+; EG: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
+; EG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
+; EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+; EG-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]]
+; EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}}
+; EG-DAG: LSHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
+; EG-DAG: LSHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
+; EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+; EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
+; EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
- %a = load i64 addrspace(1) * %in
- %b = load i64 addrspace(1) * %b_ptr
+ %a = load i64 addrspace(1)* %in
+ %b = load i64 addrspace(1)* %b_ptr
%result = lshr i64 %a, %b
store i64 %result, i64 addrspace(1)* %out
ret void
}
-;EG-CHECK: @lshr_v2i64
-;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
-;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
-;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHA]]
-;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHB]]
-;EG-CHECK-DAG: LSHL {{.*}}, 1
-;EG-CHECK-DAG: LSHL {{.*}}, 1
-;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]]
-;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]]
-;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]]
-;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]]
-;EG-CHECK-DAG: OR_INT
-;EG-CHECK-DAG: OR_INT
-;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
-;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
-;EG-CHECK-DAG: LSHR
-;EG-CHECK-DAG: LSHR
-;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
-;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
-;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
-;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
-;EG-CHECK-DAG: CNDE_INT
-;EG-CHECK-DAG: CNDE_INT
-
-;SI-CHECK: @lshr_v2i64
-;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-
+; FUNC-LABEL: {{^}}lshr_v2i64:
+; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
+; EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
+; EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
+; EG-DAG: LSHL {{\*? *}}[[COMPSHA]]
+; EG-DAG: LSHL {{\*? *}}[[COMPSHB]]
+; EG-DAG: LSHL {{.*}}, 1
+; EG-DAG: LSHL {{.*}}, 1
+; EG-DAG: LSHR {{.*}}, [[SHA]]
+; EG-DAG: LSHR {{.*}}, [[SHB]]
+; EG-DAG: LSHR {{.*}}, [[SHA]]
+; EG-DAG: LSHR {{.*}}, [[SHB]]
+; EG-DAG: OR_INT
+; EG-DAG: OR_INT
+; EG-DAG: ADD_INT {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
+; EG-DAG: ADD_INT {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
+; EG-DAG: LSHR
+; EG-DAG: LSHR
+; EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
+; EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
+; EG-DAG: CNDE_INT {{.*}}, 0.0
+; EG-DAG: CNDE_INT {{.*}}, 0.0
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
define void @lshr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
- %a = load <2 x i64> addrspace(1) * %in
- %b = load <2 x i64> addrspace(1) * %b_ptr
+ %a = load <2 x i64> addrspace(1)* %in
+ %b = load <2 x i64> addrspace(1)* %b_ptr
%result = lshr <2 x i64> %a, %b
store <2 x i64> %result, <2 x i64> addrspace(1)* %out
ret void
}
-
-;EG-CHECK: @lshr_v4i64
-;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
-;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
-;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]]
-;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHD:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHD:T[0-9]+\.[XYZW]]]
-;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHA]]
-;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHB]]
-;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHC]]
-;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHD]]
-;EG-CHECK-DAG: LSHL {{.*}}, 1
-;EG-CHECK-DAG: LSHL {{.*}}, 1
-;EG-CHECK-DAG: LSHL {{.*}}, 1
-;EG-CHECK-DAG: LSHL {{.*}}, 1
-;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]]
-;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]]
-;EG-CHECK-DAG: LSHR {{.*}}, [[SHC]]
-;EG-CHECK-DAG: LSHR {{.*}}, [[SHD]]
-;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]]
-;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]]
-;EG-CHECK-DAG: LSHR {{.*}}, [[SHC]]
-;EG-CHECK-DAG: LSHR {{.*}}, [[SHD]]
-;EG-CHECK-DAG: OR_INT
-;EG-CHECK-DAG: OR_INT
-;EG-CHECK-DAG: OR_INT
-;EG-CHECK-DAG: OR_INT
-;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
-;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
-;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHC:T[0-9]+\.[XYZW]]]{{.*}}, literal
-;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHD:T[0-9]+\.[XYZW]]]{{.*}}, literal
-;EG-CHECK-DAG: LSHR
-;EG-CHECK-DAG: LSHR
-;EG-CHECK-DAG: LSHR
-;EG-CHECK-DAG: LSHR
-;EG-CHECK-DAG: LSHR
-;EG-CHECK-DAG: LSHR
-;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
-;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
-;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHC]], literal
-;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHD]], literal
-;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
-;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
-;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
-;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
-;EG-CHECK-DAG: CNDE_INT
-;EG-CHECK-DAG: CNDE_INT
-;EG-CHECK-DAG: CNDE_INT
-;EG-CHECK-DAG: CNDE_INT
-
-;SI-CHECK: @lshr_v4i64
-;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-
+; FUNC-LABEL: {{^}}lshr_v4i64:
+; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
+; EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
+; EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
+; EG-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]]
+; EG-DAG: SUB_INT {{\*? *}}[[COMPSHD:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHD:T[0-9]+\.[XYZW]]]
+; EG-DAG: LSHL {{\*? *}}[[COMPSHA]]
+; EG-DAG: LSHL {{\*? *}}[[COMPSHB]]
+; EG-DAG: LSHL {{\*? *}}[[COMPSHC]]
+; EG-DAG: LSHL {{\*? *}}[[COMPSHD]]
+; EG-DAG: LSHL {{.*}}, 1
+; EG-DAG: LSHL {{.*}}, 1
+; EG-DAG: LSHL {{.*}}, 1
+; EG-DAG: LSHL {{.*}}, 1
+; EG-DAG: LSHR {{.*}}, [[SHA]]
+; EG-DAG: LSHR {{.*}}, [[SHB]]
+; EG-DAG: LSHR {{.*}}, [[SHC]]
+; EG-DAG: LSHR {{.*}}, [[SHD]]
+; EG-DAG: LSHR {{.*}}, [[SHA]]
+; EG-DAG: LSHR {{.*}}, [[SHB]]
+; EG-DAG: LSHR {{.*}}, [[SHC]]
+; EG-DAG: LSHR {{.*}}, [[SHD]]
+; EG-DAG: OR_INT
+; EG-DAG: OR_INT
+; EG-DAG: OR_INT
+; EG-DAG: OR_INT
+; EG-DAG: ADD_INT {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
+; EG-DAG: ADD_INT {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
+; EG-DAG: ADD_INT {{\*? *}}[[BIGSHC:T[0-9]+\.[XYZW]]]{{.*}}, literal
+; EG-DAG: ADD_INT {{\*? *}}[[BIGSHD:T[0-9]+\.[XYZW]]]{{.*}}, literal
+; EG-DAG: LSHR
+; EG-DAG: LSHR
+; EG-DAG: LSHR
+; EG-DAG: LSHR
+; EG-DAG: LSHR
+; EG-DAG: LSHR
+; EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
+; EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
+; EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHC]], literal
+; EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHD]], literal
+; EG-DAG: CNDE_INT {{.*}}, 0.0
+; EG-DAG: CNDE_INT {{.*}}, 0.0
+; EG-DAG: CNDE_INT {{.*}}, 0.0
+; EG-DAG: CNDE_INT {{.*}}, 0.0
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
define void @lshr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
- %a = load <4 x i64> addrspace(1) * %in
- %b = load <4 x i64> addrspace(1) * %b_ptr
+ %a = load <4 x i64> addrspace(1)* %in
+ %b = load <4 x i64> addrspace(1)* %b_ptr
%result = lshr <4 x i64> %a, %b
store <4 x i64> %result, <4 x i64> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/ssubo.ll b/test/CodeGen/R600/ssubo.ll
index b330276ae9e7..ccf76ca29c75 100644
--- a/test/CodeGen/R600/ssubo.ll
+++ b/test/CodeGen/R600/ssubo.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s
declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
declare { i64, i1 } @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone
-; FUNC-LABEL: @ssubo_i64_zext
+; FUNC-LABEL: {{^}}ssubo_i64_zext:
define void @ssubo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%ssub = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind
%val = extractvalue { i64, i1 } %ssub, 0
@@ -15,7 +15,7 @@ define void @ssubo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
ret void
}
-; FUNC-LABEL: @s_ssubo_i32
+; FUNC-LABEL: {{^}}s_ssubo_i32:
define void @s_ssubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind {
%ssub = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) nounwind
%val = extractvalue { i32, i1 } %ssub, 0
@@ -25,7 +25,7 @@ define void @s_ssubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32
ret void
}
-; FUNC-LABEL: @v_ssubo_i32
+; FUNC-LABEL: {{^}}v_ssubo_i32:
define void @v_ssubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%a = load i32 addrspace(1)* %aptr, align 4
%b = load i32 addrspace(1)* %bptr, align 4
@@ -37,9 +37,9 @@ define void @v_ssubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32
ret void
}
-; FUNC-LABEL: @s_ssubo_i64
-; SI: S_SUB_I32
-; SI: S_SUBB_U32
+; FUNC-LABEL: {{^}}s_ssubo_i64:
+; SI: s_sub_u32
+; SI: s_subb_u32
define void @s_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind {
%ssub = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind
%val = extractvalue { i64, i1 } %ssub, 0
@@ -49,9 +49,9 @@ define void @s_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64
ret void
}
-; FUNC-LABEL: @v_ssubo_i64
-; SI: V_SUB_I32_e32
-; SI: V_SUBB_U32_e32
+; FUNC-LABEL: {{^}}v_ssubo_i64:
+; SI: v_sub_i32_e32
+; SI: v_subb_u32_e32
define void @v_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
%a = load i64 addrspace(1)* %aptr, align 4
%b = load i64 addrspace(1)* %bptr, align 4
diff --git a/test/CodeGen/R600/store-barrier.ll b/test/CodeGen/R600/store-barrier.ll
new file mode 100644
index 000000000000..350b006ba5e0
--- /dev/null
+++ b/test/CodeGen/R600/store-barrier.ll
@@ -0,0 +1,42 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck --check-prefix=CHECK %s
+; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck --check-prefix=CHECK %s
+
+; This test is for a bug in the machine scheduler where stores without
+; an underlying object would be moved across the barrier. In this
+; test, the <2 x i8> store will be split into two i8 stores, so they
+; won't have an underlying object.
+
+; CHECK-LABEL: {{^}}test:
+; CHECK: ds_write_b8
+; CHECK: ds_write_b8
+; CHECK: s_barrier
+; CHECK: s_endpgm
+; Function Attrs: nounwind
+define void @test(<2 x i8> addrspace(3)* nocapture %arg, <2 x i8> addrspace(1)* nocapture readonly %arg1, i32 addrspace(1)* nocapture readonly %arg2, <2 x i8> addrspace(1)* nocapture %arg3, i32 %arg4, i64 %tmp9) {
+bb:
+ %tmp10 = getelementptr inbounds i32 addrspace(1)* %arg2, i64 %tmp9
+ %tmp13 = load i32 addrspace(1)* %tmp10, align 2
+ %tmp14 = getelementptr inbounds <2 x i8> addrspace(3)* %arg, i32 %tmp13
+ %tmp15 = load <2 x i8> addrspace(3)* %tmp14, align 2
+ %tmp16 = add i32 %tmp13, 1
+ %tmp17 = getelementptr inbounds <2 x i8> addrspace(3)* %arg, i32 %tmp16
+ store <2 x i8> %tmp15, <2 x i8> addrspace(3)* %tmp17, align 2
+ tail call void @llvm.AMDGPU.barrier.local() #2
+ %tmp25 = load i32 addrspace(1)* %tmp10, align 4
+ %tmp26 = sext i32 %tmp25 to i64
+ %tmp27 = sext i32 %arg4 to i64
+ %tmp28 = getelementptr inbounds <2 x i8> addrspace(3)* %arg, i32 %tmp25, i32 %arg4
+ %tmp29 = load i8 addrspace(3)* %tmp28, align 1
+ %tmp30 = getelementptr inbounds <2 x i8> addrspace(1)* %arg3, i64 %tmp26, i64 %tmp27
+ store i8 %tmp29, i8 addrspace(1)* %tmp30, align 1
+ %tmp32 = getelementptr inbounds <2 x i8> addrspace(3)* %arg, i32 %tmp25, i32 0
+ %tmp33 = load i8 addrspace(3)* %tmp32, align 1
+ %tmp35 = getelementptr inbounds <2 x i8> addrspace(1)* %arg3, i64 %tmp26, i64 0
+ store i8 %tmp33, i8 addrspace(1)* %tmp35, align 1
+ ret void
+}
+
+; Function Attrs: noduplicate nounwind
+declare void @llvm.AMDGPU.barrier.local() #2
+
+attributes #2 = { noduplicate nounwind }
diff --git a/test/CodeGen/R600/store-v3i32.ll b/test/CodeGen/R600/store-v3i32.ll
index 33578035daa2..42132f6ddba0 100644
--- a/test/CodeGen/R600/store-v3i32.ll
+++ b/test/CodeGen/R600/store-v3i32.ll
@@ -1,11 +1,11 @@
; XFAIL: *
-; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI %s
; 3 vectors have the same size and alignment as 4 vectors, so this
; should be done in a single store.
-; SI-LABEL: @store_v3i32:
-; SI: BUFFER_STORE_DWORDX4
+; SI-LABEL: {{^}}store_v3i32:
+; SI: buffer_store_dwordx4
define void @store_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> %a) nounwind {
store <3 x i32> %a, <3 x i32> addrspace(1)* %out, align 16
ret void
diff --git a/test/CodeGen/R600/store-v3i64.ll b/test/CodeGen/R600/store-v3i64.ll
index 58d28b567bd1..82d427e4fa76 100644
--- a/test/CodeGen/R600/store-v3i64.ll
+++ b/test/CodeGen/R600/store-v3i64.ll
@@ -1,27 +1,27 @@
; XFAIL: *
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI
-; SI-LABEL: @global_store_v3i64:
-; SI: BUFFER_STORE_DWORDX4
-; SI: BUFFER_STORE_DWORDX4
+; SI-LABEL: {{^}}global_store_v3i64:
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
define void @global_store_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> %x) {
store <3 x i64> %x, <3 x i64> addrspace(1)* %out, align 32
ret void
}
-; SI-LABEL: @global_store_v3i64_unaligned:
+; SI-LABEL: {{^}}global_store_v3i64_unaligned:
define void @global_store_v3i64_unaligned(<3 x i64> addrspace(1)* %out, <3 x i64> %x) {
store <3 x i64> %x, <3 x i64> addrspace(1)* %out, align 1
ret void
}
-; SI-LABEL: @local_store_v3i64:
+; SI-LABEL: {{^}}local_store_v3i64:
define void @local_store_v3i64(<3 x i64> addrspace(3)* %out, <3 x i64> %x) {
store <3 x i64> %x, <3 x i64> addrspace(3)* %out, align 32
ret void
}
-; SI-LABEL: @local_store_v3i64_unaligned:
+; SI-LABEL: {{^}}local_store_v3i64_unaligned:
define void @local_store_v3i64_unaligned(<3 x i64> addrspace(1)* %out, <3 x i64> %x) {
store <3 x i64> %x, <3 x i64> addrspace(1)* %out, align 1
ret void
diff --git a/test/CodeGen/R600/store-vector-ptrs.ll b/test/CodeGen/R600/store-vector-ptrs.ll
index 41c5edc280d7..868b68d10efa 100644
--- a/test/CodeGen/R600/store-vector-ptrs.ll
+++ b/test/CodeGen/R600/store-vector-ptrs.ll
@@ -1,9 +1,11 @@
-; REQUIRES: asserts
-; XFAIL: *
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s
+; This tests for a bug that caused a crash in
+; AMDGPUDAGToDAGISel::SelectMUBUFScratch() which is used for selecting
+; scratch loads and stores.
+; CHECK-LABEL: {{^}}store_vector_ptrs:
define void @store_vector_ptrs(<4 x i32*>* %out, <4 x [1024 x i32]*> %array) nounwind {
%p = getelementptr <4 x [1024 x i32]*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
store <4 x i32*> %p, <4 x i32*>* %out
ret void
-} \ No newline at end of file
+}
diff --git a/test/CodeGen/R600/store.ll b/test/CodeGen/R600/store.ll
index dd275338d7c0..8e5cb2a4e6d6 100644
--- a/test/CodeGen/R600/store.ll
+++ b/test/CodeGen/R600/store.ll
@@ -1,13 +1,13 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK --check-prefix=FUNC %s
-; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=CM-CHECK --check-prefix=FUNC %s
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI-CHECK -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG-CHECK -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM-CHECK -check-prefix=FUNC %s
;===------------------------------------------------------------------------===;
; Global Address Space
;===------------------------------------------------------------------------===;
-; FUNC-LABEL: @store_i1
+; FUNC-LABEL: {{^}}store_i1:
; EG-CHECK: MEM_RAT MSKOR
-; SI-CHECK: BUFFER_STORE_BYTE
+; SI-CHECK: buffer_store_byte
define void @store_i1(i1 addrspace(1)* %out) {
entry:
store i1 true, i1 addrspace(1)* %out
@@ -15,18 +15,20 @@ entry:
}
; i8 store
-; EG-CHECK-LABEL: @store_i8
+; EG-CHECK-LABEL: {{^}}store_i8:
; EG-CHECK: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
-; EG-CHECK: VTX_READ_8 [[VAL:T[0-9]\.X]], [[VAL]]
+
; IG 0: Get the byte index and truncate the value
-; EG-CHECK: AND_INT T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
-; EG-CHECK-NEXT: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], [[VAL]], literal.y
+; EG-CHECK: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
+; EG-CHECK: LSHL T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x
+; EG-CHECK: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], KC0[2].Z, literal.y
; EG-CHECK-NEXT: 3(4.203895e-45), 255(3.573311e-43)
+
+
; IG 1: Truncate the calculated the shift amount for the mask
-; EG-CHECK: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x
-; EG-CHECK-NEXT: 3
+
; IG 2: Shift the value and the mask
-; EG-CHECK: LSHL T[[RW_GPR]].X, T{{[0-9]}}.[[TRUNC_CHAN]], PV.[[SHIFT_CHAN]]
+; EG-CHECK: LSHL T[[RW_GPR]].X, PS, PV.[[SHIFT_CHAN]]
; EG-CHECK: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]]
; EG-CHECK-NEXT: 255
; IG 3: Initialize the Y and Z channels to zero
@@ -34,8 +36,8 @@ entry:
; EG-CHECK: MOV T[[RW_GPR]].Y, 0.0
; EG-CHECK: MOV * T[[RW_GPR]].Z, 0.0
-; SI-CHECK-LABEL: @store_i8
-; SI-CHECK: BUFFER_STORE_BYTE
+; SI-CHECK-LABEL: {{^}}store_i8:
+; SI-CHECK: buffer_store_byte
define void @store_i8(i8 addrspace(1)* %out, i8 %in) {
entry:
@@ -44,18 +46,23 @@ entry:
}
; i16 store
-; EG-CHECK-LABEL: @store_i16
+; EG-CHECK-LABEL: {{^}}store_i16:
; EG-CHECK: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
-; EG-CHECK: VTX_READ_16 [[VAL:T[0-9]\.X]], [[VAL]]
+
; IG 0: Get the byte index and truncate the value
-; EG-CHECK: AND_INT T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
-; EG-CHECK: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], [[VAL]], literal.y
+
+
+; EG-CHECK: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
+; EG-CHECK-NEXT: 3(4.203895e-45),
+
+; EG-CHECK: LSHL T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x
+; EG-CHECK: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], KC0[2].Z, literal.y
+
; EG-CHECK-NEXT: 3(4.203895e-45), 65535(9.183409e-41)
; IG 1: Truncate the calculated the shift amount for the mask
-; EG-CHECK: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x
-; EG-CHECK: 3
+
; IG 2: Shift the value and the mask
-; EG-CHECK: LSHL T[[RW_GPR]].X, T{{[0-9]}}.[[TRUNC_CHAN]], PV.[[SHIFT_CHAN]]
+; EG-CHECK: LSHL T[[RW_GPR]].X, PS, PV.[[SHIFT_CHAN]]
; EG-CHECK: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]]
; EG-CHECK-NEXT: 65535
; IG 3: Initialize the Y and Z channels to zero
@@ -63,20 +70,20 @@ entry:
; EG-CHECK: MOV T[[RW_GPR]].Y, 0.0
; EG-CHECK: MOV * T[[RW_GPR]].Z, 0.0
-; SI-CHECK-LABEL: @store_i16
-; SI-CHECK: BUFFER_STORE_SHORT
+; SI-CHECK-LABEL: {{^}}store_i16:
+; SI-CHECK: buffer_store_short
define void @store_i16(i16 addrspace(1)* %out, i16 %in) {
entry:
store i16 %in, i16 addrspace(1)* %out
ret void
}
-; EG-CHECK-LABEL: @store_v2i8
+; EG-CHECK-LABEL: {{^}}store_v2i8:
; EG-CHECK: MEM_RAT MSKOR
; EG-CHECK-NOT: MEM_RAT MSKOR
-; SI-CHECK-LABEL: @store_v2i8
-; SI-CHECK: BUFFER_STORE_BYTE
-; SI-CHECK: BUFFER_STORE_BYTE
+; SI-CHECK-LABEL: {{^}}store_v2i8:
+; SI-CHECK: buffer_store_byte
+; SI-CHECK: buffer_store_byte
define void @store_v2i8(<2 x i8> addrspace(1)* %out, <2 x i32> %in) {
entry:
%0 = trunc <2 x i32> %in to <2 x i8>
@@ -85,13 +92,13 @@ entry:
}
-; EG-CHECK-LABEL: @store_v2i16
+; EG-CHECK-LABEL: {{^}}store_v2i16:
; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
-; CM-CHECK-LABEL: @store_v2i16
+; CM-CHECK-LABEL: {{^}}store_v2i16:
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
-; SI-CHECK-LABEL: @store_v2i16
-; SI-CHECK: BUFFER_STORE_SHORT
-; SI-CHECK: BUFFER_STORE_SHORT
+; SI-CHECK-LABEL: {{^}}store_v2i16:
+; SI-CHECK: buffer_store_short
+; SI-CHECK: buffer_store_short
define void @store_v2i16(<2 x i16> addrspace(1)* %out, <2 x i32> %in) {
entry:
%0 = trunc <2 x i32> %in to <2 x i16>
@@ -99,15 +106,15 @@ entry:
ret void
}
-; EG-CHECK-LABEL: @store_v4i8
+; EG-CHECK-LABEL: {{^}}store_v4i8:
; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
-; CM-CHECK-LABEL: @store_v4i8
+; CM-CHECK-LABEL: {{^}}store_v4i8:
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
-; SI-CHECK-LABEL: @store_v4i8
-; SI-CHECK: BUFFER_STORE_BYTE
-; SI-CHECK: BUFFER_STORE_BYTE
-; SI-CHECK: BUFFER_STORE_BYTE
-; SI-CHECK: BUFFER_STORE_BYTE
+; SI-CHECK-LABEL: {{^}}store_v4i8:
+; SI-CHECK: buffer_store_byte
+; SI-CHECK: buffer_store_byte
+; SI-CHECK: buffer_store_byte
+; SI-CHECK: buffer_store_byte
define void @store_v4i8(<4 x i8> addrspace(1)* %out, <4 x i32> %in) {
entry:
%0 = trunc <4 x i32> %in to <4 x i8>
@@ -116,30 +123,30 @@ entry:
}
; floating-point store
-; EG-CHECK-LABEL: @store_f32
+; EG-CHECK-LABEL: {{^}}store_f32:
; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.X, T[0-9]+\.X}}, 1
-; CM-CHECK-LABEL: @store_f32
+; CM-CHECK-LABEL: {{^}}store_f32:
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK-LABEL: @store_f32
-; SI-CHECK: BUFFER_STORE_DWORD
+; SI-CHECK-LABEL: {{^}}store_f32:
+; SI-CHECK: buffer_store_dword
define void @store_f32(float addrspace(1)* %out, float %in) {
store float %in, float addrspace(1)* %out
ret void
}
-; EG-CHECK-LABEL: @store_v4i16
+; EG-CHECK-LABEL: {{^}}store_v4i16:
; EG-CHECK: MEM_RAT MSKOR
; EG-CHECK: MEM_RAT MSKOR
; EG-CHECK: MEM_RAT MSKOR
; EG-CHECK: MEM_RAT MSKOR
; EG-CHECK-NOT: MEM_RAT MSKOR
-; SI-CHECK-LABEL: @store_v4i16
-; SI-CHECK: BUFFER_STORE_SHORT
-; SI-CHECK: BUFFER_STORE_SHORT
-; SI-CHECK: BUFFER_STORE_SHORT
-; SI-CHECK: BUFFER_STORE_SHORT
-; SI-CHECK-NOT: BUFFER_STORE_BYTE
+; SI-CHECK-LABEL: {{^}}store_v4i16:
+; SI-CHECK: buffer_store_short
+; SI-CHECK: buffer_store_short
+; SI-CHECK: buffer_store_short
+; SI-CHECK: buffer_store_short
+; SI-CHECK-NOT: buffer_store_byte
define void @store_v4i16(<4 x i16> addrspace(1)* %out, <4 x i32> %in) {
entry:
%0 = trunc <4 x i32> %in to <4 x i16>
@@ -148,12 +155,12 @@ entry:
}
; vec2 floating-point stores
-; EG-CHECK-LABEL: @store_v2f32
+; EG-CHECK-LABEL: {{^}}store_v2f32:
; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
-; CM-CHECK-LABEL: @store_v2f32
+; CM-CHECK-LABEL: {{^}}store_v2f32:
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
-; SI-CHECK-LABEL: @store_v2f32
-; SI-CHECK: BUFFER_STORE_DWORDX2
+; SI-CHECK-LABEL: {{^}}store_v2f32:
+; SI-CHECK: buffer_store_dwordx2
define void @store_v2f32(<2 x float> addrspace(1)* %out, float %a, float %b) {
entry:
@@ -163,23 +170,23 @@ entry:
ret void
}
-; EG-CHECK-LABEL: @store_v4i32
+; EG-CHECK-LABEL: {{^}}store_v4i32:
; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
; EG-CHECK-NOT: MEM_RAT_CACHELESS STORE_RAW
-; CM-CHECK-LABEL: @store_v4i32
+; CM-CHECK-LABEL: {{^}}store_v4i32:
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
; CM-CHECK-NOT: MEM_RAT_CACHELESS STORE_DWORD
-; SI-CHECK-LABEL: @store_v4i32
-; SI-CHECK: BUFFER_STORE_DWORDX4
+; SI-CHECK-LABEL: {{^}}store_v4i32:
+; SI-CHECK: buffer_store_dwordx4
define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
entry:
store <4 x i32> %in, <4 x i32> addrspace(1)* %out
ret void
}
-; FUNC-LABEL: @store_i64_i8
+; FUNC-LABEL: {{^}}store_i64_i8:
; EG-CHECK: MEM_RAT MSKOR
-; SI-CHECK: BUFFER_STORE_BYTE
+; SI-CHECK: buffer_store_byte
define void @store_i64_i8(i8 addrspace(1)* %out, i64 %in) {
entry:
%0 = trunc i64 %in to i8
@@ -187,9 +194,9 @@ entry:
ret void
}
-; FUNC-LABEL: @store_i64_i16
+; FUNC-LABEL: {{^}}store_i64_i16:
; EG-CHECK: MEM_RAT MSKOR
-; SI-CHECK: BUFFER_STORE_SHORT
+; SI-CHECK: buffer_store_short
define void @store_i64_i16(i16 addrspace(1)* %out, i64 %in) {
entry:
%0 = trunc i64 %in to i16
@@ -201,99 +208,99 @@ entry:
; Local Address Space
;===------------------------------------------------------------------------===;
-; FUNC-LABEL: @store_local_i1
+; FUNC-LABEL: {{^}}store_local_i1:
; EG-CHECK: LDS_BYTE_WRITE
-; SI-CHECK: DS_WRITE_B8
+; SI-CHECK: ds_write_b8
define void @store_local_i1(i1 addrspace(3)* %out) {
entry:
store i1 true, i1 addrspace(3)* %out
ret void
}
-; EG-CHECK-LABEL: @store_local_i8
+; EG-CHECK-LABEL: {{^}}store_local_i8:
; EG-CHECK: LDS_BYTE_WRITE
-; SI-CHECK-LABEL: @store_local_i8
-; SI-CHECK: DS_WRITE_B8
+; SI-CHECK-LABEL: {{^}}store_local_i8:
+; SI-CHECK: ds_write_b8
define void @store_local_i8(i8 addrspace(3)* %out, i8 %in) {
store i8 %in, i8 addrspace(3)* %out
ret void
}
-; EG-CHECK-LABEL: @store_local_i16
+; EG-CHECK-LABEL: {{^}}store_local_i16:
; EG-CHECK: LDS_SHORT_WRITE
-; SI-CHECK-LABEL: @store_local_i16
-; SI-CHECK: DS_WRITE_B16
+; SI-CHECK-LABEL: {{^}}store_local_i16:
+; SI-CHECK: ds_write_b16
define void @store_local_i16(i16 addrspace(3)* %out, i16 %in) {
store i16 %in, i16 addrspace(3)* %out
ret void
}
-; EG-CHECK-LABEL: @store_local_v2i16
+; EG-CHECK-LABEL: {{^}}store_local_v2i16:
; EG-CHECK: LDS_WRITE
-; CM-CHECK-LABEL: @store_local_v2i16
+; CM-CHECK-LABEL: {{^}}store_local_v2i16:
; CM-CHECK: LDS_WRITE
-; SI-CHECK-LABEL: @store_local_v2i16
-; SI-CHECK: DS_WRITE_B16
-; SI-CHECK: DS_WRITE_B16
+; SI-CHECK-LABEL: {{^}}store_local_v2i16:
+; SI-CHECK: ds_write_b16
+; SI-CHECK: ds_write_b16
define void @store_local_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> %in) {
entry:
store <2 x i16> %in, <2 x i16> addrspace(3)* %out
ret void
}
-; EG-CHECK-LABEL: @store_local_v4i8
+; EG-CHECK-LABEL: {{^}}store_local_v4i8:
; EG-CHECK: LDS_WRITE
-; CM-CHECK-LABEL: @store_local_v4i8
+; CM-CHECK-LABEL: {{^}}store_local_v4i8:
; CM-CHECK: LDS_WRITE
-; SI-CHECK-LABEL: @store_local_v4i8
-; SI-CHECK: DS_WRITE_B8
-; SI-CHECK: DS_WRITE_B8
-; SI-CHECK: DS_WRITE_B8
-; SI-CHECK: DS_WRITE_B8
+; SI-CHECK-LABEL: {{^}}store_local_v4i8:
+; SI-CHECK: ds_write_b8
+; SI-CHECK: ds_write_b8
+; SI-CHECK: ds_write_b8
+; SI-CHECK: ds_write_b8
define void @store_local_v4i8(<4 x i8> addrspace(3)* %out, <4 x i8> %in) {
entry:
store <4 x i8> %in, <4 x i8> addrspace(3)* %out
ret void
}
-; EG-CHECK-LABEL: @store_local_v2i32
+; EG-CHECK-LABEL: {{^}}store_local_v2i32:
; EG-CHECK: LDS_WRITE
; EG-CHECK: LDS_WRITE
-; CM-CHECK-LABEL: @store_local_v2i32
+; CM-CHECK-LABEL: {{^}}store_local_v2i32:
; CM-CHECK: LDS_WRITE
; CM-CHECK: LDS_WRITE
-; SI-CHECK-LABEL: @store_local_v2i32
-; SI-CHECK: DS_WRITE_B64
+; SI-CHECK-LABEL: {{^}}store_local_v2i32:
+; SI-CHECK: ds_write_b64
define void @store_local_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> %in) {
entry:
store <2 x i32> %in, <2 x i32> addrspace(3)* %out
ret void
}
-; EG-CHECK-LABEL: @store_local_v4i32
+; EG-CHECK-LABEL: {{^}}store_local_v4i32:
; EG-CHECK: LDS_WRITE
; EG-CHECK: LDS_WRITE
; EG-CHECK: LDS_WRITE
; EG-CHECK: LDS_WRITE
-; CM-CHECK-LABEL: @store_local_v4i32
+; CM-CHECK-LABEL: {{^}}store_local_v4i32:
; CM-CHECK: LDS_WRITE
; CM-CHECK: LDS_WRITE
; CM-CHECK: LDS_WRITE
; CM-CHECK: LDS_WRITE
-; SI-CHECK-LABEL: @store_local_v4i32
-; SI-CHECK: DS_WRITE_B32
-; SI-CHECK: DS_WRITE_B32
-; SI-CHECK: DS_WRITE_B32
-; SI-CHECK: DS_WRITE_B32
+; SI-CHECK-LABEL: {{^}}store_local_v4i32:
+; SI-CHECK: ds_write_b32
+; SI-CHECK: ds_write_b32
+; SI-CHECK: ds_write_b32
+; SI-CHECK: ds_write_b32
define void @store_local_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %in) {
entry:
store <4 x i32> %in, <4 x i32> addrspace(3)* %out
ret void
}
-; FUNC-LABEL: @store_local_i64_i8
+; FUNC-LABEL: {{^}}store_local_i64_i8:
; EG-CHECK: LDS_BYTE_WRITE
-; SI-CHECK: DS_WRITE_B8
+; SI-CHECK: ds_write_b8
define void @store_local_i64_i8(i8 addrspace(3)* %out, i64 %in) {
entry:
%0 = trunc i64 %in to i8
@@ -301,9 +308,9 @@ entry:
ret void
}
-; FUNC-LABEL: @store_local_i64_i16
+; FUNC-LABEL: {{^}}store_local_i64_i16:
; EG-CHECK: LDS_SHORT_WRITE
-; SI-CHECK: DS_WRITE_B16
+; SI-CHECK: ds_write_b16
define void @store_local_i64_i16(i16 addrspace(3)* %out, i64 %in) {
entry:
%0 = trunc i64 %in to i16
@@ -318,12 +325,12 @@ entry:
; Evergreen / Northern Islands don't support 64-bit stores yet, so there should
; be two 32-bit stores.
-; EG-CHECK-LABEL: @vecload2
+; EG-CHECK-LABEL: {{^}}vecload2:
; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
-; CM-CHECK-LABEL: @vecload2
+; CM-CHECK-LABEL: {{^}}vecload2:
; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
-; SI-CHECK-LABEL: @vecload2
-; SI-CHECK: BUFFER_STORE_DWORDX2
+; SI-CHECK-LABEL: {{^}}vecload2:
+; SI-CHECK: buffer_store_dwordx2
define void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
entry:
%0 = load i32 addrspace(2)* %mem, align 4
@@ -339,7 +346,7 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"=
; When i128 was a legal type this program generated cannot select errors:
-; FUNC-LABEL: @i128-const-store
+; FUNC-LABEL: {{^}}"i128-const-store":
; FIXME: We should be able to to this with one store instruction
; EG-CHECK: STORE_RAW
; EG-CHECK: STORE_RAW
@@ -349,8 +356,8 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"=
; CM-CHECK: STORE_DWORD
; CM-CHECK: STORE_DWORD
; CM-CHECK: STORE_DWORD
-; SI: BUFFER_STORE_DWORDX2
-; SI: BUFFER_STORE_DWORDX2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
define void @i128-const-store(i32 addrspace(1)* %out) {
entry:
store i32 1, i32 addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/store.r600.ll b/test/CodeGen/R600/store.r600.ll
index 00589a0c6c86..3df30d4c6696 100644
--- a/test/CodeGen/R600/store.r600.ll
+++ b/test/CodeGen/R600/store.r600.ll
@@ -3,7 +3,7 @@
; XXX: Merge this test into store.ll once it is supported on SI
; v4i32 store
-; EG-CHECK: @store_v4i32
+; EG-CHECK: {{^}}store_v4i32:
; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
@@ -13,7 +13,7 @@ define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %
}
; v4f32 store
-; EG-CHECK: @store_v4f32
+; EG-CHECK: {{^}}store_v4f32:
; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
define void @store_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%1 = load <4 x float> addrspace(1) * %in
diff --git a/test/CodeGen/R600/structurize.ll b/test/CodeGen/R600/structurize.ll
index c2acd938ad05..02e592e9a559 100644
--- a/test/CodeGen/R600/structurize.ll
+++ b/test/CodeGen/R600/structurize.ll
@@ -13,7 +13,7 @@
;
;
-; CHECK-LABEL: @branch_into_diamond
+; CHECK-LABEL: {{^}}branch_into_diamond:
; === entry block:
; CHECK: ALU_PUSH_BEFORE
; === Branch instruction (IF):
diff --git a/test/CodeGen/R600/structurize1.ll b/test/CodeGen/R600/structurize1.ll
index 8c10301a1686..77432c1f9d2b 100644
--- a/test/CodeGen/R600/structurize1.ll
+++ b/test/CodeGen/R600/structurize1.ll
@@ -16,7 +16,7 @@
; }
; }
-; CHECK-LABEL: @if_inside_loop
+; CHECK-LABEL: {{^}}if_inside_loop:
; CHECK: LOOP_START_DX10
; CHECK: END_LOOP
define void @if_inside_loop(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {
diff --git a/test/CodeGen/R600/sub.ll b/test/CodeGen/R600/sub.ll
index 8e64148142d2..be48e186e870 100644
--- a/test/CodeGen/R600/sub.ll
+++ b/test/CodeGen/R600/sub.ll
@@ -1,16 +1,31 @@
-;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-;RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
declare i32 @llvm.r600.read.tidig.x() readnone
-;FUNC-LABEL: @test2
-;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; FUNC-LABEL: {{^}}test_sub_i32:
+; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+; SI: v_subrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+define void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+ %a = load i32 addrspace(1)* %in
+ %b = load i32 addrspace(1)* %b_ptr
+ %result = sub i32 %a, %b
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
-;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+; FUNC-LABEL: {{^}}test_sub_v2i32:
+; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+
+define void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
%a = load <2 x i32> addrspace(1) * %in
%b = load <2 x i32> addrspace(1) * %b_ptr
@@ -19,18 +34,18 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
ret void
}
-;FUNC-LABEL: @test4
-;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; FUNC-LABEL: {{^}}test_sub_v4i32:
+; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+define void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
%a = load <4 x i32> addrspace(1) * %in
%b = load <4 x i32> addrspace(1) * %b_ptr
@@ -39,9 +54,9 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
ret void
}
-; FUNC-LABEL: @s_sub_i64:
-; SI: S_SUB_I32
-; SI: S_SUBB_U32
+; FUNC-LABEL: {{^}}s_sub_i64:
+; SI: s_sub_u32
+; SI: s_subb_u32
; EG-DAG: SETGE_UINT
; EG-DAG: CNDE_INT
@@ -54,9 +69,9 @@ define void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 %b) nounwind
ret void
}
-; FUNC-LABEL: @v_sub_i64:
-; SI: V_SUB_I32_e32
-; SI: V_SUBB_U32_e32
+; FUNC-LABEL: {{^}}v_sub_i64:
+; SI: v_sub_i32_e32
+; SI: v_subb_u32_e32
; EG-DAG: SETGE_UINT
; EG-DAG: CNDE_INT
@@ -73,3 +88,39 @@ define void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}
+
+; FUNC-LABEL: {{^}}v_test_sub_v2i64:
+; SI: v_sub_i32_e32
+; SI: v_subb_u32_e32
+; SI: v_sub_i32_e32
+; SI: v_subb_u32_e32
+define void @v_test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) {
+ %tid = call i32 @llvm.r600.read.tidig.x() readnone
+ %a_ptr = getelementptr <2 x i64> addrspace(1)* %inA, i32 %tid
+ %b_ptr = getelementptr <2 x i64> addrspace(1)* %inB, i32 %tid
+ %a = load <2 x i64> addrspace(1)* %a_ptr
+ %b = load <2 x i64> addrspace(1)* %b_ptr
+ %result = sub <2 x i64> %a, %b
+ store <2 x i64> %result, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_test_sub_v4i64:
+; SI: v_sub_i32_e32
+; SI: v_subb_u32_e32
+; SI: v_sub_i32_e32
+; SI: v_subb_u32_e32
+; SI: v_sub_i32_e32
+; SI: v_subb_u32_e32
+; SI: v_sub_i32_e32
+; SI: v_subb_u32_e32
+define void @v_test_sub_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* noalias %inA, <4 x i64> addrspace(1)* noalias %inB) {
+ %tid = call i32 @llvm.r600.read.tidig.x() readnone
+ %a_ptr = getelementptr <4 x i64> addrspace(1)* %inA, i32 %tid
+ %b_ptr = getelementptr <4 x i64> addrspace(1)* %inB, i32 %tid
+ %a = load <4 x i64> addrspace(1)* %a_ptr
+ %b = load <4 x i64> addrspace(1)* %b_ptr
+ %result = sub <4 x i64> %a, %b
+ store <4 x i64> %result, <4 x i64> addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/subreg-coalescer-crash.ll b/test/CodeGen/R600/subreg-coalescer-crash.ll
new file mode 100644
index 000000000000..e841637b5047
--- /dev/null
+++ b/test/CodeGen/R600/subreg-coalescer-crash.ll
@@ -0,0 +1,45 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs -o - %s
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+target triple = "r600--"
+
+; SI: s_endpgm
+; Function Attrs: nounwind
+define void @row_filter_C1_D0() #0 {
+entry:
+ br i1 undef, label %for.inc.1, label %do.body.preheader
+
+do.body.preheader: ; preds = %entry
+ %0 = insertelement <4 x i32> zeroinitializer, i32 undef, i32 1
+ br i1 undef, label %do.body56.1, label %do.body90
+
+do.body90: ; preds = %do.body56.2, %do.body56.1, %do.body.preheader
+ %1 = phi <4 x i32> [ %6, %do.body56.2 ], [ %5, %do.body56.1 ], [ %0, %do.body.preheader ]
+ %2 = insertelement <4 x i32> %1, i32 undef, i32 2
+ %3 = insertelement <4 x i32> %2, i32 undef, i32 3
+ br i1 undef, label %do.body124.1, label %do.body.1562.preheader
+
+do.body.1562.preheader: ; preds = %do.body124.1, %do.body90
+ %storemerge = phi <4 x i32> [ %3, %do.body90 ], [ %7, %do.body124.1 ]
+ %4 = insertelement <4 x i32> undef, i32 undef, i32 1
+ br label %for.inc.1
+
+do.body56.1: ; preds = %do.body.preheader
+ %5 = insertelement <4 x i32> %0, i32 undef, i32 1
+ %or.cond472.1 = or i1 undef, undef
+ br i1 %or.cond472.1, label %do.body56.2, label %do.body90
+
+do.body56.2: ; preds = %do.body56.1
+ %6 = insertelement <4 x i32> %5, i32 undef, i32 1
+ br label %do.body90
+
+do.body124.1: ; preds = %do.body90
+ %7 = insertelement <4 x i32> %3, i32 undef, i32 3
+ br label %do.body.1562.preheader
+
+for.inc.1: ; preds = %do.body.1562.preheader, %entry
+ %storemerge591 = phi <4 x i32> [ zeroinitializer, %entry ], [ %storemerge, %do.body.1562.preheader ]
+ %add.i495 = add <4 x i32> %storemerge591, undef
+ unreachable
+}
+
diff --git a/test/CodeGen/R600/swizzle-export.ll b/test/CodeGen/R600/swizzle-export.ll
index 16c3f191935c..3e6f7a732630 100644
--- a/test/CodeGen/R600/swizzle-export.ll
+++ b/test/CodeGen/R600/swizzle-export.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-;EG-CHECK: @main
+;EG-CHECK: {{^}}main:
;EG-CHECK: EXPORT T{{[0-9]+}}.XYXX
;EG-CHECK: EXPORT T{{[0-9]+}}.ZXXX
;EG-CHECK: EXPORT T{{[0-9]+}}.XXWX
@@ -92,9 +92,9 @@ main_body:
ret void
}
-; EG-CHECK: @main2
+; EG-CHECK: {{^}}main2:
; EG-CHECK: T{{[0-9]+}}.XY__
-; EG-CHECK: T{{[0-9]+}}.YXZ0
+; EG-CHECK: T{{[0-9]+}}.ZXY0
define void @main2(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
main_body:
diff --git a/test/CodeGen/R600/trunc-cmp-constant.ll b/test/CodeGen/R600/trunc-cmp-constant.ll
new file mode 100644
index 000000000000..73c35512d77c
--- /dev/null
+++ b/test/CodeGen/R600/trunc-cmp-constant.ll
@@ -0,0 +1,169 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+; FUNC-LABEL {{^}}sextload_i1_to_i32_trunc_cmp_eq_0:
+; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
+; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
+; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[TMP]], 1{{$}}
+; SI: s_xor_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -1{{$}}
+; SI: v_cndmask_b32_e64
+; SI: buffer_store_byte
+define void @sextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+ %load = load i1 addrspace(1)* %in
+ %ext = sext i1 %load to i32
+ %cmp = icmp eq i32 %ext, 0
+ store i1 %cmp, i1 addrspace(1)* %out
+ ret void
+}
+
+; FIXME: The negate should be inverting the compare.
+; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_eq_0:
+; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
+; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
+; SI: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], [[TMP]], 1{{$}}
+; SI-NEXT: s_xor_b64 [[NEG:s\[[0-9]+:[0-9]+\]]], [[CMP0]], -1
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[NEG]]
+; SI-NEXT: buffer_store_byte [[RESULT]]
+define void @zextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+ %load = load i1 addrspace(1)* %in
+ %ext = zext i1 %load to i32
+ %cmp = icmp eq i32 %ext, 0
+ store i1 %cmp, i1 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_eq_1:
+; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
+; SI-NEXT: buffer_store_byte [[RESULT]]
+define void @sextload_i1_to_i32_trunc_cmp_eq_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+ %load = load i1 addrspace(1)* %in
+ %ext = sext i1 %load to i32
+ %cmp = icmp eq i32 %ext, 1
+ store i1 %cmp, i1 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_eq_1:
+; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
+; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]]
+; SI-NEXT: buffer_store_byte [[RESULT]]
+define void @zextload_i1_to_i32_trunc_cmp_eq_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+ %load = load i1 addrspace(1)* %in
+ %ext = zext i1 %load to i32
+ %cmp = icmp eq i32 %ext, 1
+ store i1 %cmp, i1 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_eq_neg1:
+; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
+; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]]
+; SI-NEXT: buffer_store_byte [[RESULT]]
+define void @sextload_i1_to_i32_trunc_cmp_eq_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+ %load = load i1 addrspace(1)* %in
+ %ext = sext i1 %load to i32
+ %cmp = icmp eq i32 %ext, -1
+ store i1 %cmp, i1 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_eq_neg1:
+; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
+; SI-NEXT: buffer_store_byte [[RESULT]]
+define void @zextload_i1_to_i32_trunc_cmp_eq_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+ %load = load i1 addrspace(1)* %in
+ %ext = zext i1 %load to i32
+ %cmp = icmp eq i32 %ext, -1
+ store i1 %cmp, i1 addrspace(1)* %out
+ ret void
+}
+
+
+; FUNC-LABEL {{^}}sextload_i1_to_i32_trunc_cmp_ne_0:
+; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
+; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
+; SI-NEXT: buffer_store_byte [[RESULT]]
+define void @sextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+ %load = load i1 addrspace(1)* %in
+ %ext = sext i1 %load to i32
+ %cmp = icmp ne i32 %ext, 0
+ store i1 %cmp, i1 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_0:
+; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
+; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
+; SI-NEXT: buffer_store_byte [[RESULT]]
+define void @zextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+ %load = load i1 addrspace(1)* %in
+ %ext = zext i1 %load to i32
+ %cmp = icmp ne i32 %ext, 0
+ store i1 %cmp, i1 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_ne_1:
+; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
+; SI-NEXT: buffer_store_byte [[RESULT]]
+define void @sextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+ %load = load i1 addrspace(1)* %in
+ %ext = sext i1 %load to i32
+ %cmp = icmp ne i32 %ext, 1
+ store i1 %cmp, i1 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_1:
+; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
+; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
+; SI: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], [[TMP]], 1{{$}}
+; SI-NEXT: s_xor_b64 [[NEG:s\[[0-9]+:[0-9]+\]]], [[CMP0]], -1
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[NEG]]
+; SI-NEXT: buffer_store_byte [[RESULT]]
+define void @zextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+ %load = load i1 addrspace(1)* %in
+ %ext = zext i1 %load to i32
+ %cmp = icmp ne i32 %ext, 1
+ store i1 %cmp, i1 addrspace(1)* %out
+ ret void
+}
+
+; FIXME: This should be one compare.
+; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_ne_neg1:
+; XSI: buffer_load_ubyte [[LOAD:v[0-9]+]]
+; XSI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
+; XSI: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], [[TMP]], 0{{$}}
+; XSI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP0]]
+; XSI-NEXT: buffer_store_byte [[RESULT]]
+define void @sextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+ %load = load i1 addrspace(1)* %in
+ %ext = sext i1 %load to i32
+ %cmp = icmp ne i32 %ext, -1
+ store i1 %cmp, i1 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_neg1:
+; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
+; SI-NEXT: buffer_store_byte [[RESULT]]
+define void @zextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+ %load = load i1 addrspace(1)* %in
+ %ext = zext i1 %load to i32
+ %cmp = icmp ne i32 %ext, -1
+ store i1 %cmp, i1 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}masked_load_i1_to_i32_trunc_cmp_ne_neg1:
+; SI: buffer_load_sbyte [[LOAD:v[0-9]+]]
+; SI: v_cmp_ne_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[LOAD]], -1{{$}}
+; SI-NEXT: v_cndmask_b32_e64
+; SI-NEXT: buffer_store_byte
+define void @masked_load_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
+ %load = load i8 addrspace(1)* %in
+ %masked = and i8 %load, 255
+ %ext = sext i8 %masked to i32
+ %cmp = icmp ne i32 %ext, -1
+ store i1 %cmp, i1 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/trunc-store-i1.ll b/test/CodeGen/R600/trunc-store-i1.ll
index a3975c8b8e4c..83b546f93a4a 100644
--- a/test/CodeGen/R600/trunc-store-i1.ll
+++ b/test/CodeGen/R600/trunc-store-i1.ll
@@ -1,30 +1,30 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
-; SI-LABEL: @global_truncstore_i32_to_i1
-; SI: S_LOAD_DWORD [[LOAD:s[0-9]+]],
-; SI: S_AND_B32 [[SREG:s[0-9]+]], [[LOAD]], 1
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], [[SREG]]
-; SI: BUFFER_STORE_BYTE [[VREG]],
+; SI-LABEL: {{^}}global_truncstore_i32_to_i1:
+; SI: s_load_dword [[LOAD:s[0-9]+]],
+; SI: s_and_b32 [[SREG:s[0-9]+]], [[LOAD]], 1
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], [[SREG]]
+; SI: buffer_store_byte [[VREG]],
define void @global_truncstore_i32_to_i1(i1 addrspace(1)* %out, i32 %val) nounwind {
%trunc = trunc i32 %val to i1
store i1 %trunc, i1 addrspace(1)* %out, align 1
ret void
}
-; SI-LABEL: @global_truncstore_i64_to_i1
-; SI: BUFFER_STORE_BYTE
+; SI-LABEL: {{^}}global_truncstore_i64_to_i1:
+; SI: buffer_store_byte
define void @global_truncstore_i64_to_i1(i1 addrspace(1)* %out, i64 %val) nounwind {
%trunc = trunc i64 %val to i1
store i1 %trunc, i1 addrspace(1)* %out, align 1
ret void
}
-; SI-LABEL: @global_truncstore_i16_to_i1
-; SI: S_LOAD_DWORD [[LOAD:s[0-9]+]],
-; SI: S_AND_B32 [[SREG:s[0-9]+]], [[LOAD]], 1
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], [[SREG]]
-; SI: BUFFER_STORE_BYTE [[VREG]],
+; SI-LABEL: {{^}}global_truncstore_i16_to_i1:
+; SI: s_load_dword [[LOAD:s[0-9]+]],
+; SI: s_and_b32 [[SREG:s[0-9]+]], [[LOAD]], 1
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], [[SREG]]
+; SI: buffer_store_byte [[VREG]],
define void @global_truncstore_i16_to_i1(i1 addrspace(1)* %out, i16 %val) nounwind {
%trunc = trunc i16 %val to i1
store i1 %trunc, i1 addrspace(1)* %out, align 1
diff --git a/test/CodeGen/R600/trunc-vector-store-assertion-failure.ll b/test/CodeGen/R600/trunc-vector-store-assertion-failure.ll
index ec959c21798a..878ea3f48995 100644
--- a/test/CodeGen/R600/trunc-vector-store-assertion-failure.ll
+++ b/test/CodeGen/R600/trunc-vector-store-assertion-failure.ll
@@ -4,7 +4,7 @@
; vector stores at the end of a basic block were not being added to the
; LegalizedNodes list, which triggered an assertion failure.
-; CHECK-LABEL: @test
+; CHECK-LABEL: {{^}}test:
; CHECK: MEM_RAT_CACHELESS STORE_RAW
define void @test(<4 x i8> addrspace(1)* %out, i32 %cond, <4 x i8> %in) {
entry:
diff --git a/test/CodeGen/R600/trunc.ll b/test/CodeGen/R600/trunc.ll
index 31cdfcd1a884..a6f902f24976 100644
--- a/test/CodeGen/R600/trunc.ll
+++ b/test/CodeGen/R600/trunc.ll
@@ -1,13 +1,13 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
define void @trunc_i64_to_i32_store(i32 addrspace(1)* %out, i64 %in) {
-; SI-LABEL: @trunc_i64_to_i32_store
-; SI: S_LOAD_DWORD s0, s[0:1], 0xb
-; SI: V_MOV_B32_e32 v0, s0
-; SI: BUFFER_STORE_DWORD v0
+; SI-LABEL: {{^}}trunc_i64_to_i32_store:
+; SI: s_load_dword [[SLOAD:s[0-9]+]], s[0:1], 0xb
+; SI: v_mov_b32_e32 [[VLOAD:v[0-9]+]], [[SLOAD]]
+; SI: buffer_store_dword [[VLOAD]]
-; EG-LABEL: @trunc_i64_to_i32_store
+; EG-LABEL: {{^}}trunc_i64_to_i32_store:
; EG: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
; EG: LSHR
; EG-NEXT: 2(
@@ -16,12 +16,12 @@ define void @trunc_i64_to_i32_store(i32 addrspace(1)* %out, i64 %in) {
ret void
}
-; SI-LABEL: @trunc_load_shl_i64:
-; SI-DAG: S_LOAD_DWORDX2
-; SI-DAG: S_LOAD_DWORD [[SREG:s[0-9]+]],
-; SI: S_LSHL_B32 [[SHL:s[0-9]+]], [[SREG]], 2
-; SI: V_MOV_B32_e32 [[VSHL:v[0-9]+]], [[SHL]]
-; SI: BUFFER_STORE_DWORD [[VSHL]],
+; SI-LABEL: {{^}}trunc_load_shl_i64:
+; SI-DAG: s_load_dwordx2
+; SI-DAG: s_load_dword [[SREG:s[0-9]+]],
+; SI: s_lshl_b32 [[SHL:s[0-9]+]], [[SREG]], 2
+; SI: v_mov_b32_e32 [[VSHL:v[0-9]+]], [[SHL]]
+; SI: buffer_store_dword [[VSHL]],
define void @trunc_load_shl_i64(i32 addrspace(1)* %out, i64 %a) {
%b = shl i64 %a, 2
%result = trunc i64 %b to i32
@@ -29,12 +29,13 @@ define void @trunc_load_shl_i64(i32 addrspace(1)* %out, i64 %a) {
ret void
}
-; SI-LABEL: @trunc_shl_i64:
-; SI: S_LOAD_DWORDX2 s{{\[}}[[LO_SREG:[0-9]+]]:{{[0-9]+\]}},
-; SI: S_ADD_I32 s[[LO_ADD:[0-9]+]], s[[LO_SREG]],
-; SI: S_LSHL_B64 s{{\[}}[[LO_SREG2:[0-9]+]]:{{[0-9]+\]}}, s{{\[}}[[LO_ADD]]:{{[0-9]+\]}}, 2
-; SI: V_MOV_B32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG2]]
-; SI: BUFFER_STORE_DWORD v[[LO_VREG]],
+; SI-LABEL: {{^}}trunc_shl_i64:
+; SI: s_load_dwordx2 s{{\[}}[[LO_SREG:[0-9]+]]:{{[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI: s_lshl_b64 s{{\[}}[[LO_SHL:[0-9]+]]:{{[0-9]+\]}}, s{{\[}}[[LO_SREG]]:{{[0-9]+\]}}, 2
+; SI: s_add_u32 s[[LO_SREG2:[0-9]+]], s[[LO_SHL]],
+; SI: s_addc_u32
+; SI: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG2]]
+; SI: buffer_store_dword v[[LO_VREG]],
define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64 %a) {
%aa = add i64 %a, 234 ; Prevent shrinking store.
%b = shl i64 %aa, 2
@@ -44,10 +45,21 @@ define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64
ret void
}
-; SI-LABEL: @trunc_i32_to_i1:
-; SI: V_AND_B32
-; SI: V_CMP_EQ_I32
-define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) {
+; SI-LABEL: {{^}}trunc_i32_to_i1:
+; SI: v_and_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
+; SI: v_cmp_eq_i32
+define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) {
+ %a = load i32 addrspace(1)* %ptr, align 4
+ %trunc = trunc i32 %a to i1
+ %result = select i1 %trunc, i32 1, i32 0
+ store i32 %result, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}sgpr_trunc_i32_to_i1:
+; SI: v_and_b32_e64 v{{[0-9]+}}, 1, s{{[0-9]+}}
+; SI: v_cmp_eq_i32
+define void @sgpr_trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) {
%trunc = trunc i32 %a to i1
%result = select i1 %trunc, i32 1, i32 0
store i32 %result, i32 addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/uaddo.ll b/test/CodeGen/R600/uaddo.ll
index a80e502eef2a..ac2960412503 100644
--- a/test/CodeGen/R600/uaddo.ll
+++ b/test/CodeGen/R600/uaddo.ll
@@ -1,13 +1,13 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s
declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
-; FUNC-LABEL: @uaddo_i64_zext
-; SI: ADD
-; SI: ADDC
-; SI: ADDC
+; FUNC-LABEL: {{^}}uaddo_i64_zext:
+; SI: add
+; SI: addc
+; SI: addc
define void @uaddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) nounwind
%val = extractvalue { i64, i1 } %uadd, 0
@@ -18,8 +18,8 @@ define void @uaddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
ret void
}
-; FUNC-LABEL: @s_uaddo_i32
-; SI: S_ADD_I32
+; FUNC-LABEL: {{^}}s_uaddo_i32:
+; SI: s_add_i32
define void @s_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind {
%uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) nounwind
%val = extractvalue { i32, i1 } %uadd, 0
@@ -29,8 +29,8 @@ define void @s_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32
ret void
}
-; FUNC-LABEL: @v_uaddo_i32
-; SI: V_ADD_I32
+; FUNC-LABEL: {{^}}v_uaddo_i32:
+; SI: v_add_i32
define void @v_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%a = load i32 addrspace(1)* %aptr, align 4
%b = load i32 addrspace(1)* %bptr, align 4
@@ -42,9 +42,9 @@ define void @v_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32
ret void
}
-; FUNC-LABEL: @s_uaddo_i64
-; SI: S_ADD_I32
-; SI: S_ADDC_U32
+; FUNC-LABEL: {{^}}s_uaddo_i64:
+; SI: s_add_u32
+; SI: s_addc_u32
define void @s_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind {
%uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) nounwind
%val = extractvalue { i64, i1 } %uadd, 0
@@ -54,9 +54,9 @@ define void @s_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64
ret void
}
-; FUNC-LABEL: @v_uaddo_i64
-; SI: V_ADD_I32
-; SI: V_ADDC_U32
+; FUNC-LABEL: {{^}}v_uaddo_i64:
+; SI: v_add_i32
+; SI: v_addc_u32
define void @v_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
%a = load i64 addrspace(1)* %aptr, align 4
%b = load i64 addrspace(1)* %bptr, align 4
diff --git a/test/CodeGen/R600/udiv.ll b/test/CodeGen/R600/udiv.ll
index 53713217a1c0..e9a6155da6fb 100644
--- a/test/CodeGen/R600/udiv.ll
+++ b/test/CodeGen/R600/udiv.ll
@@ -1,7 +1,7 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
-;EG-CHECK-LABEL: @test
+;EG-CHECK-LABEL: {{^}}test:
;EG-CHECK-NOT: SETGE_INT
;EG-CHECK: CF_END
@@ -18,10 +18,10 @@ define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
;The goal of this test is to make sure the ISel doesn't fail when it gets
;a v4i32 udiv
-;EG-CHECK-LABEL: @test2
+;EG-CHECK-LABEL: {{^}}test2:
;EG-CHECK: CF_END
-;SI-CHECK-LABEL: @test2
-;SI-CHECK: S_ENDPGM
+;SI-CHECK-LABEL: {{^}}test2:
+;SI-CHECK: s_endpgm
define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
@@ -32,10 +32,10 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
ret void
}
-;EG-CHECK-LABEL: @test4
+;EG-CHECK-LABEL: {{^}}test4:
;EG-CHECK: CF_END
-;SI-CHECK-LABEL: @test4
-;SI-CHECK: S_ENDPGM
+;SI-CHECK-LABEL: {{^}}test4:
+;SI-CHECK: s_endpgm
define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
diff --git a/test/CodeGen/R600/udivrem.ll b/test/CodeGen/R600/udivrem.ll
index 5f5753adca3f..2254a2abd5b9 100644
--- a/test/CodeGen/R600/udivrem.ll
+++ b/test/CodeGen/R600/udivrem.ll
@@ -1,7 +1,7 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s
-; FUNC-LABEL: @test_udivrem
+; FUNC-LABEL: {{^}}test_udivrem:
; EG: RECIP_UINT
; EG-DAG: MULHI
; EG-DAG: MULLO_INT
@@ -26,30 +26,30 @@
; EG-DAG: CNDE_INT
; EG-DAG: CNDE_INT
-; SI: V_RCP_IFLAG_F32_e32 [[RCP:v[0-9]+]]
-; SI-DAG: V_MUL_HI_U32 [[RCP_HI:v[0-9]+]], [[RCP]]
-; SI-DAG: V_MUL_LO_I32 [[RCP_LO:v[0-9]+]], [[RCP]]
-; SI-DAG: V_SUB_I32_e32 [[NEG_RCP_LO:v[0-9]+]], 0, [[RCP_LO]]
-; SI: V_CNDMASK_B32_e64
-; SI: V_MUL_HI_U32 [[E:v[0-9]+]], {{v[0-9]+}}, [[RCP]]
-; SI-DAG: V_ADD_I32_e32 [[RCP_A_E:v[0-9]+]], [[E]], [[RCP]]
-; SI-DAG: V_SUBREV_I32_e32 [[RCP_S_E:v[0-9]+]], [[E]], [[RCP]]
-; SI: V_CNDMASK_B32_e64
-; SI: V_MUL_HI_U32 [[Quotient:v[0-9]+]]
-; SI: V_MUL_LO_I32 [[Num_S_Remainder:v[0-9]+]]
-; SI-DAG: V_SUB_I32_e32 [[Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[Num_S_Remainder]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI: V_AND_B32_e32 [[Tmp1:v[0-9]+]]
-; SI-DAG: V_ADD_I32_e32 [[Quotient_A_One:v[0-9]+]], 1, [[Quotient]]
-; SI-DAG: V_SUBREV_I32_e32 [[Quotient_S_One:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_ADD_I32_e32 [[Remainder_A_Den:v[0-9]+]],
-; SI-DAG: V_SUBREV_I32_e32 [[Remainder_S_Den:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI: S_ENDPGM
+; SI: v_rcp_iflag_f32_e32 [[RCP:v[0-9]+]]
+; SI-DAG: v_mul_hi_u32 [[RCP_HI:v[0-9]+]], [[RCP]]
+; SI-DAG: v_mul_lo_i32 [[RCP_LO:v[0-9]+]], [[RCP]]
+; SI-DAG: v_sub_i32_e32 [[NEG_RCP_LO:v[0-9]+]], 0, [[RCP_LO]]
+; SI: v_cndmask_b32_e64
+; SI: v_mul_hi_u32 [[E:v[0-9]+]], {{v[0-9]+}}, [[RCP]]
+; SI-DAG: v_add_i32_e32 [[RCP_A_E:v[0-9]+]], [[E]], [[RCP]]
+; SI-DAG: v_subrev_i32_e32 [[RCP_S_E:v[0-9]+]], [[E]], [[RCP]]
+; SI: v_cndmask_b32_e64
+; SI: v_mul_hi_u32 [[Quotient:v[0-9]+]]
+; SI: v_mul_lo_i32 [[Num_S_Remainder:v[0-9]+]]
+; SI-DAG: v_sub_i32_e32 [[Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[Num_S_Remainder]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI: v_and_b32_e32 [[Tmp1:v[0-9]+]]
+; SI-DAG: v_add_i32_e32 [[Quotient_A_One:v[0-9]+]], 1, [[Quotient]]
+; SI-DAG: v_subrev_i32_e32 [[Quotient_S_One:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_add_i32_e32 [[Remainder_A_Den:v[0-9]+]],
+; SI-DAG: v_subrev_i32_e32 [[Remainder_S_Den:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI: s_endpgm
define void @test_udivrem(i32 addrspace(1)* %out, i32 %x, i32 %y) {
%result0 = udiv i32 %x, %y
store i32 %result0, i32 addrspace(1)* %out
@@ -58,7 +58,7 @@ define void @test_udivrem(i32 addrspace(1)* %out, i32 %x, i32 %y) {
ret void
}
-; FUNC-LABEL: @test_udivrem_v2
+; FUNC-LABEL: {{^}}test_udivrem_v2:
; EG-DAG: RECIP_UINT
; EG-DAG: MULHI
; EG-DAG: MULLO_INT
@@ -106,53 +106,53 @@ define void @test_udivrem(i32 addrspace(1)* %out, i32 %x, i32 %y) {
; EG-DAG: CNDE_INT
; EG-DAG: CNDE_INT
-; SI-DAG: V_RCP_IFLAG_F32_e32 [[FIRST_RCP:v[0-9]+]]
-; SI-DAG: V_MUL_HI_U32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]]
-; SI-DAG: V_MUL_LO_I32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]]
-; SI-DAG: V_SUB_I32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]]
-; SI-DAG: V_ADD_I32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
-; SI-DAG: V_SUBREV_I32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[FIRST_Quotient:v[0-9]+]]
-; SI-DAG: V_MUL_LO_I32 [[FIRST_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: V_SUB_I32_e32 [[FIRST_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FIRST_Num_S_Remainder]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_AND_B32_e32 [[FIRST_Tmp1:v[0-9]+]]
-; SI-DAG: V_ADD_I32_e32 [[FIRST_Quotient_A_One:v[0-9]+]], {{.*}}, [[FIRST_Quotient]]
-; SI-DAG: V_SUBREV_I32_e32 [[FIRST_Quotient_S_One:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_ADD_I32_e32 [[FIRST_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: V_SUBREV_I32_e32 [[FIRST_Remainder_S_Den:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_RCP_IFLAG_F32_e32 [[SECOND_RCP:v[0-9]+]]
-; SI-DAG: V_MUL_HI_U32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]]
-; SI-DAG: V_MUL_LO_I32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]]
-; SI-DAG: V_SUB_I32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]]
-; SI-DAG: V_ADD_I32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
-; SI-DAG: V_SUBREV_I32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[SECOND_Quotient:v[0-9]+]]
-; SI-DAG: V_MUL_LO_I32 [[SECOND_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: V_SUB_I32_e32 [[SECOND_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[SECOND_Num_S_Remainder]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_AND_B32_e32 [[SECOND_Tmp1:v[0-9]+]]
-; SI-DAG: V_ADD_I32_e32 [[SECOND_Quotient_A_One:v[0-9]+]], {{.*}}, [[SECOND_Quotient]]
-; SI-DAG: V_SUBREV_I32_e32 [[SECOND_Quotient_S_One:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_ADD_I32_e32 [[SECOND_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: V_SUBREV_I32_e32 [[SECOND_Remainder_S_Den:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI: S_ENDPGM
+; SI-DAG: v_rcp_iflag_f32_e32 [[FIRST_RCP:v[0-9]+]]
+; SI-DAG: v_mul_hi_u32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]]
+; SI-DAG: v_mul_lo_i32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]]
+; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]]
+; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
+; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[FIRST_Quotient:v[0-9]+]]
+; SI-DAG: v_mul_lo_i32 [[FIRST_Num_S_Remainder:v[0-9]+]]
+; SI-DAG: v_sub_i32_e32 [[FIRST_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FIRST_Num_S_Remainder]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_and_b32_e32 [[FIRST_Tmp1:v[0-9]+]]
+; SI-DAG: v_add_i32_e32 [[FIRST_Quotient_A_One:v[0-9]+]], {{.*}}, [[FIRST_Quotient]]
+; SI-DAG: v_subrev_i32_e32 [[FIRST_Quotient_S_One:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_add_i32_e32 [[FIRST_Remainder_A_Den:v[0-9]+]],
+; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_rcp_iflag_f32_e32 [[SECOND_RCP:v[0-9]+]]
+; SI-DAG: v_mul_hi_u32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]]
+; SI-DAG: v_mul_lo_i32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]]
+; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]]
+; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
+; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[SECOND_Quotient:v[0-9]+]]
+; SI-DAG: v_mul_lo_i32 [[SECOND_Num_S_Remainder:v[0-9]+]]
+; SI-DAG: v_sub_i32_e32 [[SECOND_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[SECOND_Num_S_Remainder]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_and_b32_e32 [[SECOND_Tmp1:v[0-9]+]]
+; SI-DAG: v_add_i32_e32 [[SECOND_Quotient_A_One:v[0-9]+]], {{.*}}, [[SECOND_Quotient]]
+; SI-DAG: v_subrev_i32_e32 [[SECOND_Quotient_S_One:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_add_i32_e32 [[SECOND_Remainder_A_Den:v[0-9]+]],
+; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI: s_endpgm
define void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i32> %y) {
%result0 = udiv <2 x i32> %x, %y
store <2 x i32> %result0, <2 x i32> addrspace(1)* %out
@@ -162,7 +162,7 @@ define void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i3
}
-; FUNC-LABEL: @test_udivrem_v4
+; FUNC-LABEL: {{^}}test_udivrem_v4:
; EG-DAG: RECIP_UINT
; EG-DAG: MULHI
; EG-DAG: MULLO_INT
@@ -256,99 +256,99 @@ define void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i3
; EG-DAG: CNDE_INT
; EG-DAG: CNDE_INT
-; SI-DAG: V_RCP_IFLAG_F32_e32 [[FIRST_RCP:v[0-9]+]]
-; SI-DAG: V_MUL_HI_U32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]]
-; SI-DAG: V_MUL_LO_I32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]]
-; SI-DAG: V_SUB_I32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]]
-; SI-DAG: V_ADD_I32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
-; SI-DAG: V_SUBREV_I32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[FIRST_Quotient:v[0-9]+]]
-; SI-DAG: V_MUL_LO_I32 [[FIRST_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: V_SUB_I32_e32 [[FIRST_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FIRST_Num_S_Remainder]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_AND_B32_e32 [[FIRST_Tmp1:v[0-9]+]]
-; SI-DAG: V_ADD_I32_e32 [[FIRST_Quotient_A_One:v[0-9]+]], {{.*}}, [[FIRST_Quotient]]
-; SI-DAG: V_SUBREV_I32_e32 [[FIRST_Quotient_S_One:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_ADD_I32_e32 [[FIRST_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: V_SUBREV_I32_e32 [[FIRST_Remainder_S_Den:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_RCP_IFLAG_F32_e32 [[SECOND_RCP:v[0-9]+]]
-; SI-DAG: V_MUL_HI_U32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]]
-; SI-DAG: V_MUL_LO_I32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]]
-; SI-DAG: V_SUB_I32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]]
-; SI-DAG: V_ADD_I32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
-; SI-DAG: V_SUBREV_I32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[SECOND_Quotient:v[0-9]+]]
-; SI-DAG: V_MUL_LO_I32 [[SECOND_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: V_SUB_I32_e32 [[SECOND_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[SECOND_Num_S_Remainder]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_AND_B32_e32 [[SECOND_Tmp1:v[0-9]+]]
-; SI-DAG: V_ADD_I32_e32 [[SECOND_Quotient_A_One:v[0-9]+]], {{.*}}, [[SECOND_Quotient]]
-; SI-DAG: V_SUBREV_I32_e32 [[SECOND_Quotient_S_One:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_ADD_I32_e32 [[SECOND_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: V_SUBREV_I32_e32 [[SECOND_Remainder_S_Den:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_RCP_IFLAG_F32_e32 [[THIRD_RCP:v[0-9]+]]
-; SI-DAG: V_MUL_HI_U32 [[THIRD_RCP_HI:v[0-9]+]], [[THIRD_RCP]]
-; SI-DAG: V_MUL_LO_I32 [[THIRD_RCP_LO:v[0-9]+]], [[THIRD_RCP]]
-; SI-DAG: V_SUB_I32_e32 [[THIRD_NEG_RCP_LO:v[0-9]+]], 0, [[THIRD_RCP_LO]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[THIRD_E:v[0-9]+]], {{v[0-9]+}}, [[THIRD_RCP]]
-; SI-DAG: V_ADD_I32_e32 [[THIRD_RCP_A_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]]
-; SI-DAG: V_SUBREV_I32_e32 [[THIRD_RCP_S_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[THIRD_Quotient:v[0-9]+]]
-; SI-DAG: V_MUL_LO_I32 [[THIRD_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: V_SUB_I32_e32 [[THIRD_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[THIRD_Num_S_Remainder]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_AND_B32_e32 [[THIRD_Tmp1:v[0-9]+]]
-; SI-DAG: V_ADD_I32_e32 [[THIRD_Quotient_A_One:v[0-9]+]], {{.*}}, [[THIRD_Quotient]]
-; SI-DAG: V_SUBREV_I32_e32 [[THIRD_Quotient_S_One:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_ADD_I32_e32 [[THIRD_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: V_SUBREV_I32_e32 [[THIRD_Remainder_S_Den:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_RCP_IFLAG_F32_e32 [[FOURTH_RCP:v[0-9]+]]
-; SI-DAG: V_MUL_HI_U32 [[FOURTH_RCP_HI:v[0-9]+]], [[FOURTH_RCP]]
-; SI-DAG: V_MUL_LO_I32 [[FOURTH_RCP_LO:v[0-9]+]], [[FOURTH_RCP]]
-; SI-DAG: V_SUB_I32_e32 [[FOURTH_NEG_RCP_LO:v[0-9]+]], 0, [[FOURTH_RCP_LO]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[FOURTH_E:v[0-9]+]], {{v[0-9]+}}, [[FOURTH_RCP]]
-; SI-DAG: V_ADD_I32_e32 [[FOURTH_RCP_A_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]]
-; SI-DAG: V_SUBREV_I32_e32 [[FOURTH_RCP_S_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[FOURTH_Quotient:v[0-9]+]]
-; SI-DAG: V_MUL_LO_I32 [[FOURTH_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: V_SUB_I32_e32 [[FOURTH_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FOURTH_Num_S_Remainder]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_AND_B32_e32 [[FOURTH_Tmp1:v[0-9]+]]
-; SI-DAG: V_ADD_I32_e32 [[FOURTH_Quotient_A_One:v[0-9]+]], {{.*}}, [[FOURTH_Quotient]]
-; SI-DAG: V_SUBREV_I32_e32 [[FOURTH_Quotient_S_One:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_ADD_I32_e32 [[FOURTH_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: V_SUBREV_I32_e32 [[FOURTH_Remainder_S_Den:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI: S_ENDPGM
+; SI-DAG: v_rcp_iflag_f32_e32 [[FIRST_RCP:v[0-9]+]]
+; SI-DAG: v_mul_hi_u32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]]
+; SI-DAG: v_mul_lo_i32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]]
+; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]]
+; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
+; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[FIRST_Quotient:v[0-9]+]]
+; SI-DAG: v_mul_lo_i32 [[FIRST_Num_S_Remainder:v[0-9]+]]
+; SI-DAG: v_sub_i32_e32 [[FIRST_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FIRST_Num_S_Remainder]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_and_b32_e32 [[FIRST_Tmp1:v[0-9]+]]
+; SI-DAG: v_add_i32_e32 [[FIRST_Quotient_A_One:v[0-9]+]], {{.*}}, [[FIRST_Quotient]]
+; SI-DAG: v_subrev_i32_e32 [[FIRST_Quotient_S_One:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_add_i32_e32 [[FIRST_Remainder_A_Den:v[0-9]+]],
+; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_rcp_iflag_f32_e32 [[SECOND_RCP:v[0-9]+]]
+; SI-DAG: v_mul_hi_u32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]]
+; SI-DAG: v_mul_lo_i32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]]
+; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]]
+; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
+; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[SECOND_Quotient:v[0-9]+]]
+; SI-DAG: v_mul_lo_i32 [[SECOND_Num_S_Remainder:v[0-9]+]]
+; SI-DAG: v_sub_i32_e32 [[SECOND_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[SECOND_Num_S_Remainder]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_and_b32_e32 [[SECOND_Tmp1:v[0-9]+]]
+; SI-DAG: v_add_i32_e32 [[SECOND_Quotient_A_One:v[0-9]+]], {{.*}}, [[SECOND_Quotient]]
+; SI-DAG: v_subrev_i32_e32 [[SECOND_Quotient_S_One:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_add_i32_e32 [[SECOND_Remainder_A_Den:v[0-9]+]],
+; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_rcp_iflag_f32_e32 [[THIRD_RCP:v[0-9]+]]
+; SI-DAG: v_mul_hi_u32 [[THIRD_RCP_HI:v[0-9]+]], [[THIRD_RCP]]
+; SI-DAG: v_mul_lo_i32 [[THIRD_RCP_LO:v[0-9]+]], [[THIRD_RCP]]
+; SI-DAG: v_sub_i32_e32 [[THIRD_NEG_RCP_LO:v[0-9]+]], 0, [[THIRD_RCP_LO]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[THIRD_E:v[0-9]+]], {{v[0-9]+}}, [[THIRD_RCP]]
+; SI-DAG: v_add_i32_e32 [[THIRD_RCP_A_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]]
+; SI-DAG: v_subrev_i32_e32 [[THIRD_RCP_S_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[THIRD_Quotient:v[0-9]+]]
+; SI-DAG: v_mul_lo_i32 [[THIRD_Num_S_Remainder:v[0-9]+]]
+; SI-DAG: v_sub_i32_e32 [[THIRD_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[THIRD_Num_S_Remainder]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_and_b32_e32 [[THIRD_Tmp1:v[0-9]+]]
+; SI-DAG: v_add_i32_e32 [[THIRD_Quotient_A_One:v[0-9]+]], {{.*}}, [[THIRD_Quotient]]
+; SI-DAG: v_subrev_i32_e32 [[THIRD_Quotient_S_One:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_add_i32_e32 [[THIRD_Remainder_A_Den:v[0-9]+]],
+; SI-DAG: v_subrev_i32_e32 [[THIRD_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_rcp_iflag_f32_e32 [[FOURTH_RCP:v[0-9]+]]
+; SI-DAG: v_mul_hi_u32 [[FOURTH_RCP_HI:v[0-9]+]], [[FOURTH_RCP]]
+; SI-DAG: v_mul_lo_i32 [[FOURTH_RCP_LO:v[0-9]+]], [[FOURTH_RCP]]
+; SI-DAG: v_sub_i32_e32 [[FOURTH_NEG_RCP_LO:v[0-9]+]], 0, [[FOURTH_RCP_LO]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[FOURTH_E:v[0-9]+]], {{v[0-9]+}}, [[FOURTH_RCP]]
+; SI-DAG: v_add_i32_e32 [[FOURTH_RCP_A_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]]
+; SI-DAG: v_subrev_i32_e32 [[FOURTH_RCP_S_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[FOURTH_Quotient:v[0-9]+]]
+; SI-DAG: v_mul_lo_i32 [[FOURTH_Num_S_Remainder:v[0-9]+]]
+; SI-DAG: v_sub_i32_e32 [[FOURTH_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FOURTH_Num_S_Remainder]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_and_b32_e32 [[FOURTH_Tmp1:v[0-9]+]]
+; SI-DAG: v_add_i32_e32 [[FOURTH_Quotient_A_One:v[0-9]+]], {{.*}}, [[FOURTH_Quotient]]
+; SI-DAG: v_subrev_i32_e32 [[FOURTH_Quotient_S_One:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_add_i32_e32 [[FOURTH_Remainder_A_Den:v[0-9]+]],
+; SI-DAG: v_subrev_i32_e32 [[FOURTH_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI: s_endpgm
define void @test_udivrem_v4(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) {
%result0 = udiv <4 x i32> %x, %y
store <4 x i32> %result0, <4 x i32> addrspace(1)* %out
diff --git a/test/CodeGen/R600/udivrem24.ll b/test/CodeGen/R600/udivrem24.ll
new file mode 100644
index 000000000000..0e15c0730c04
--- /dev/null
+++ b/test/CodeGen/R600/udivrem24.ll
@@ -0,0 +1,244 @@
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}udiv24_i8:
+; SI: v_cvt_f32_ubyte
+; SI: v_cvt_f32_ubyte
+; SI: v_rcp_f32
+; SI: v_cvt_u32_f32
+
+; EG: UINT_TO_FLT
+; EG-DAG: UINT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_UINT
+define void @udiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
+ %den_ptr = getelementptr i8 addrspace(1)* %in, i8 1
+ %num = load i8 addrspace(1) * %in
+ %den = load i8 addrspace(1) * %den_ptr
+ %result = udiv i8 %num, %den
+ store i8 %result, i8 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}udiv24_i16:
+; SI: v_cvt_f32_u32
+; SI: v_cvt_f32_u32
+; SI: v_rcp_f32
+; SI: v_cvt_u32_f32
+
+; EG: UINT_TO_FLT
+; EG-DAG: UINT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_UINT
+define void @udiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
+ %den_ptr = getelementptr i16 addrspace(1)* %in, i16 1
+ %num = load i16 addrspace(1) * %in, align 2
+ %den = load i16 addrspace(1) * %den_ptr, align 2
+ %result = udiv i16 %num, %den
+ store i16 %result, i16 addrspace(1)* %out, align 2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}udiv24_i32:
+; SI: v_cvt_f32_u32
+; SI-DAG: v_cvt_f32_u32
+; SI-DAG: v_rcp_f32
+; SI: v_cvt_u32_f32
+
+; EG: UINT_TO_FLT
+; EG-DAG: UINT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_UINT
+define void @udiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+ %num = load i32 addrspace(1) * %in, align 4
+ %den = load i32 addrspace(1) * %den_ptr, align 4
+ %num.i24.0 = shl i32 %num, 8
+ %den.i24.0 = shl i32 %den, 8
+ %num.i24 = lshr i32 %num.i24.0, 8
+ %den.i24 = lshr i32 %den.i24.0, 8
+ %result = udiv i32 %num.i24, %den.i24
+ store i32 %result, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}udiv25_i32:
+; RCP_IFLAG is for URECIP in the full 32b alg
+; SI: v_rcp_iflag
+; SI-NOT: v_rcp_f32
+
+; EG-NOT: UINT_TO_FLT
+; EG-NOT: RECIP_IEEE
+define void @udiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+ %num = load i32 addrspace(1) * %in, align 4
+ %den = load i32 addrspace(1) * %den_ptr, align 4
+ %num.i24.0 = shl i32 %num, 7
+ %den.i24.0 = shl i32 %den, 7
+ %num.i24 = lshr i32 %num.i24.0, 7
+ %den.i24 = lshr i32 %den.i24.0, 7
+ %result = udiv i32 %num.i24, %den.i24
+ store i32 %result, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_no_udiv24_i32_1:
+; RCP_IFLAG is for URECIP in the full 32b alg
+; SI: v_rcp_iflag
+; SI-NOT: v_rcp_f32
+
+; EG-NOT: UINT_TO_FLT
+; EG-NOT: RECIP_IEEE
+define void @test_no_udiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+ %num = load i32 addrspace(1) * %in, align 4
+ %den = load i32 addrspace(1) * %den_ptr, align 4
+ %num.i24.0 = shl i32 %num, 8
+ %den.i24.0 = shl i32 %den, 7
+ %num.i24 = lshr i32 %num.i24.0, 8
+ %den.i24 = lshr i32 %den.i24.0, 7
+ %result = udiv i32 %num.i24, %den.i24
+ store i32 %result, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_no_udiv24_i32_2:
+; RCP_IFLAG is for URECIP in the full 32b alg
+; SI: v_rcp_iflag
+; SI-NOT: v_rcp_f32
+
+; EG-NOT: UINT_TO_FLT
+; EG-NOT: RECIP_IEEE
+define void @test_no_udiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+ %num = load i32 addrspace(1) * %in, align 4
+ %den = load i32 addrspace(1) * %den_ptr, align 4
+ %num.i24.0 = shl i32 %num, 7
+ %den.i24.0 = shl i32 %den, 8
+ %num.i24 = lshr i32 %num.i24.0, 7
+ %den.i24 = lshr i32 %den.i24.0, 8
+ %result = udiv i32 %num.i24, %den.i24
+ store i32 %result, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}urem24_i8:
+; SI: v_cvt_f32_ubyte
+; SI: v_cvt_f32_ubyte
+; SI: v_rcp_f32
+; SI: v_cvt_u32_f32
+
+; EG: UINT_TO_FLT
+; EG-DAG: UINT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_UINT
+define void @urem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
+ %den_ptr = getelementptr i8 addrspace(1)* %in, i8 1
+ %num = load i8 addrspace(1) * %in
+ %den = load i8 addrspace(1) * %den_ptr
+ %result = urem i8 %num, %den
+ store i8 %result, i8 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}urem24_i16:
+; SI: v_cvt_f32_u32
+; SI: v_cvt_f32_u32
+; SI: v_rcp_f32
+; SI: v_cvt_u32_f32
+
+; EG: UINT_TO_FLT
+; EG-DAG: UINT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_UINT
+define void @urem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
+ %den_ptr = getelementptr i16 addrspace(1)* %in, i16 1
+ %num = load i16 addrspace(1) * %in, align 2
+ %den = load i16 addrspace(1) * %den_ptr, align 2
+ %result = urem i16 %num, %den
+ store i16 %result, i16 addrspace(1)* %out, align 2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}urem24_i32:
+; SI: v_cvt_f32_u32
+; SI: v_cvt_f32_u32
+; SI: v_rcp_f32
+; SI: v_cvt_u32_f32
+
+; EG: UINT_TO_FLT
+; EG-DAG: UINT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_UINT
+define void @urem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+ %num = load i32 addrspace(1) * %in, align 4
+ %den = load i32 addrspace(1) * %den_ptr, align 4
+ %num.i24.0 = shl i32 %num, 8
+ %den.i24.0 = shl i32 %den, 8
+ %num.i24 = lshr i32 %num.i24.0, 8
+ %den.i24 = lshr i32 %den.i24.0, 8
+ %result = urem i32 %num.i24, %den.i24
+ store i32 %result, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}urem25_i32:
+; RCP_IFLAG is for URECIP in the full 32b alg
+; SI: v_rcp_iflag
+; SI-NOT: v_rcp_f32
+
+; EG-NOT: UINT_TO_FLT
+; EG-NOT: RECIP_IEEE
+define void @urem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+ %num = load i32 addrspace(1) * %in, align 4
+ %den = load i32 addrspace(1) * %den_ptr, align 4
+ %num.i24.0 = shl i32 %num, 7
+ %den.i24.0 = shl i32 %den, 7
+ %num.i24 = lshr i32 %num.i24.0, 7
+ %den.i24 = lshr i32 %den.i24.0, 7
+ %result = urem i32 %num.i24, %den.i24
+ store i32 %result, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_no_urem24_i32_1:
+; RCP_IFLAG is for URECIP in the full 32b alg
+; SI: v_rcp_iflag
+; SI-NOT: v_rcp_f32
+
+; EG-NOT: UINT_TO_FLT
+; EG-NOT: RECIP_IEEE
+define void @test_no_urem24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+ %num = load i32 addrspace(1) * %in, align 4
+ %den = load i32 addrspace(1) * %den_ptr, align 4
+ %num.i24.0 = shl i32 %num, 8
+ %den.i24.0 = shl i32 %den, 7
+ %num.i24 = lshr i32 %num.i24.0, 8
+ %den.i24 = lshr i32 %den.i24.0, 7
+ %result = urem i32 %num.i24, %den.i24
+ store i32 %result, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_no_urem24_i32_2:
+; RCP_IFLAG is for URECIP in the full 32b alg
+; SI: v_rcp_iflag
+; SI-NOT: v_rcp_f32
+
+; EG-NOT: UINT_TO_FLT
+; EG-NOT: RECIP_IEEE
+define void @test_no_urem24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+ %num = load i32 addrspace(1) * %in, align 4
+ %den = load i32 addrspace(1) * %den_ptr, align 4
+ %num.i24.0 = shl i32 %num, 7
+ %den.i24.0 = shl i32 %den, 8
+ %num.i24 = lshr i32 %num.i24.0, 7
+ %den.i24 = lshr i32 %den.i24.0, 8
+ %result = urem i32 %num.i24, %den.i24
+ store i32 %result, i32 addrspace(1)* %out, align 4
+ ret void
+}
diff --git a/test/CodeGen/R600/udivrem64.ll b/test/CodeGen/R600/udivrem64.ll
index a71315a12d80..a04745859879 100644
--- a/test/CodeGen/R600/udivrem64.ll
+++ b/test/CodeGen/R600/udivrem64.ll
@@ -1,7 +1,7 @@
-;XUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
+;XUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s
-;FUNC-LABEL: @test_udiv
+;FUNC-LABEL: {{^}}test_udiv:
;EG: RECIP_UINT
;EG: LSHL {{.*}}, 1,
;EG: BFE_UINT
@@ -34,14 +34,14 @@
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
-;SI: S_ENDPGM
+;SI: s_endpgm
define void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%result = udiv i64 %x, %y
store i64 %result, i64 addrspace(1)* %out
ret void
}
-;FUNC-LABEL: @test_urem
+;FUNC-LABEL: {{^}}test_urem:
;EG: RECIP_UINT
;EG: BFE_UINT
;EG: BFE_UINT
@@ -74,7 +74,7 @@ define void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
;EG: BFE_UINT
;EG: BFE_UINT
;EG: AND_INT {{.*}}, 1,
-;SI: S_ENDPGM
+;SI: s_endpgm
define void @test_urem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%result = urem i64 %x, %y
store i64 %result, i64 addrspace(1)* %out
diff --git a/test/CodeGen/R600/uint_to_fp.f64.ll b/test/CodeGen/R600/uint_to_fp.f64.ll
index 9a41796a06bf..09e987dd14da 100644
--- a/test/CodeGen/R600/uint_to_fp.f64.ll
+++ b/test/CodeGen/R600/uint_to_fp.f64.ll
@@ -1,35 +1,96 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-; SI-LABEL: @uint_to_fp_f64_i32
-; SI: V_CVT_F64_U32_e32
-; SI: S_ENDPGM
-define void @uint_to_fp_f64_i32(double addrspace(1)* %out, i32 %in) {
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+; SI-LABEL: {{^}}v_uint_to_fp_i64_to_f64
+; SI: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
+; SI: v_cvt_f64_u32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
+; SI: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32
+; SI: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
+; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]]
+; SI: buffer_store_dwordx2 [[RESULT]]
+define void @v_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep = getelementptr i64 addrspace(1)* %in, i32 %tid
+ %val = load i64 addrspace(1)* %gep, align 8
+ %result = uitofp i64 %val to double
+ store double %result, double addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}s_uint_to_fp_i64_to_f64
+define void @s_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 %in) {
+ %cast = uitofp i64 %in to double
+ store double %cast, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; SI-LABEL: {{^}}s_uint_to_fp_v2i64_to_v2f64
+define void @s_uint_to_fp_v2i64_to_v2f64(<2 x double> addrspace(1)* %out, <2 x i64> %in) {
+ %cast = uitofp <2 x i64> %in to <2 x double>
+ store <2 x double> %cast, <2 x double> addrspace(1)* %out, align 16
+ ret void
+}
+
+; SI-LABEL: {{^}}s_uint_to_fp_v4i64_to_v4f64
+define void @s_uint_to_fp_v4i64_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i64> %in) {
+ %cast = uitofp <4 x i64> %in to <4 x double>
+ store <4 x double> %cast, <4 x double> addrspace(1)* %out, align 16
+ ret void
+}
+
+; SI-LABEL: {{^}}s_uint_to_fp_i32_to_f64
+; SI: v_cvt_f64_u32_e32
+; SI: s_endpgm
+define void @s_uint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) {
%cast = uitofp i32 %in to double
store double %cast, double addrspace(1)* %out, align 8
ret void
}
-; SI-LABEL: @uint_to_fp_i1_f64:
-; SI: V_CMP_EQ_I32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
-; FIXME: We should the VGPR sources for V_CNDMASK are copied from SGPRs,
-; we should be able to fold the SGPRs into the V_CNDMASK instructions.
-; SI: V_CNDMASK_B32_e64 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CMP]]
-; SI: V_CNDMASK_B32_e64 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CMP]]
-; SI: BUFFER_STORE_DWORDX2
-; SI: S_ENDPGM
-define void @uint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) {
+; SI-LABEL: {{^}}s_uint_to_fp_v2i32_to_v2f64
+; SI: v_cvt_f64_u32_e32
+; SI: v_cvt_f64_u32_e32
+; SI: s_endpgm
+define void @s_uint_to_fp_v2i32_to_v2f64(<2 x double> addrspace(1)* %out, <2 x i32> %in) {
+ %cast = uitofp <2 x i32> %in to <2 x double>
+ store <2 x double> %cast, <2 x double> addrspace(1)* %out, align 16
+ ret void
+}
+
+; SI-LABEL: {{^}}s_uint_to_fp_v4i32_to_v4f64
+; SI: v_cvt_f64_u32_e32
+; SI: v_cvt_f64_u32_e32
+; SI: v_cvt_f64_u32_e32
+; SI: v_cvt_f64_u32_e32
+; SI: s_endpgm
+define void @s_uint_to_fp_v4i32_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i32> %in) {
+ %cast = uitofp <4 x i32> %in to <4 x double>
+ store <4 x double> %cast, <4 x double> addrspace(1)* %out, align 16
+ ret void
+}
+
+; SI-LABEL: {{^}}uint_to_fp_i1_to_f64:
+; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
+; We can't fold the SGPRs into v_cndmask_b32_e64, because it already
+; uses an SGPR for [[CMP]]
+; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]]
+; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]]
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @uint_to_fp_i1_to_f64(double addrspace(1)* %out, i32 %in) {
%cmp = icmp eq i32 %in, 0
%fp = uitofp i1 %cmp to double
store double %fp, double addrspace(1)* %out, align 4
ret void
}
-; SI-LABEL: @uint_to_fp_i1_f64_load:
-; SI: V_CNDMASK_B32_e64 [[IRESULT:v[0-9]]], 0, 1
-; SI-NEXT: V_CVT_F64_U32_e32 [[RESULT:v\[[0-9]+:[0-9]\]]], [[IRESULT]]
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
-; SI: S_ENDPGM
-define void @uint_to_fp_i1_f64_load(double addrspace(1)* %out, i1 %in) {
+; SI-LABEL: {{^}}uint_to_fp_i1_to_f64_load:
+; SI: v_cndmask_b32_e64 [[IRESULT:v[0-9]]], 0, 1
+; SI-NEXT: v_cvt_f64_u32_e32 [[RESULT:v\[[0-9]+:[0-9]\]]], [[IRESULT]]
+; SI: buffer_store_dwordx2 [[RESULT]]
+; SI: s_endpgm
+define void @uint_to_fp_i1_to_f64_load(double addrspace(1)* %out, i1 %in) {
%fp = uitofp i1 %in to double
store double %fp, double addrspace(1)* %out, align 8
ret void
diff --git a/test/CodeGen/R600/uint_to_fp.ll b/test/CodeGen/R600/uint_to_fp.ll
index 8f5d42d42c6b..1c5e487724bc 100644
--- a/test/CodeGen/R600/uint_to_fp.ll
+++ b/test/CodeGen/R600/uint_to_fp.ll
@@ -1,69 +1,80 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
-; FUNC-LABEL: @uint_to_fp_v2i32
+; FUNC-LABEL: {{^}}uint_to_fp_i32_to_f32:
+; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+
+; SI: v_cvt_f32_u32_e32
+; SI: s_endpgm
+define void @uint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 %in) {
+ %result = uitofp i32 %in to float
+ store float %result, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}uint_to_fp_v2i32_to_v2f32:
; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W
; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X
-; SI: V_CVT_F32_U32_e32
-; SI: V_CVT_F32_U32_e32
-; SI: S_ENDPGM
-define void @uint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2 x i32> %in) {
+; SI: v_cvt_f32_u32_e32
+; SI: v_cvt_f32_u32_e32
+; SI: s_endpgm
+define void @uint_to_fp_v2i32_to_v2f32(<2 x float> addrspace(1)* %out, <2 x i32> %in) {
%result = uitofp <2 x i32> %in to <2 x float>
store <2 x float> %result, <2 x float> addrspace(1)* %out
ret void
}
-; FUNC-LABEL: @uint_to_fp_v4i32
+; FUNC-LABEL: {{^}}uint_to_fp_v4i32_to_v4f32:
; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI: V_CVT_F32_U32_e32
-; SI: V_CVT_F32_U32_e32
-; SI: V_CVT_F32_U32_e32
-; SI: V_CVT_F32_U32_e32
-; SI: S_ENDPGM
-define void @uint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+; SI: v_cvt_f32_u32_e32
+; SI: v_cvt_f32_u32_e32
+; SI: v_cvt_f32_u32_e32
+; SI: v_cvt_f32_u32_e32
+; SI: s_endpgm
+define void @uint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%value = load <4 x i32> addrspace(1) * %in
%result = uitofp <4 x i32> %value to <4 x float>
store <4 x float> %result, <4 x float> addrspace(1)* %out
ret void
}
-; FUNC-LABEL: @uint_to_fp_i64_f32
+; FUNC-LABEL: {{^}}uint_to_fp_i64_to_f32:
; R600: UINT_TO_FLT
; R600: UINT_TO_FLT
; R600: MULADD_IEEE
-; SI: V_CVT_F32_U32_e32
-; SI: V_CVT_F32_U32_e32
-; SI: V_MAD_F32
-; SI: S_ENDPGM
-define void @uint_to_fp_i64_f32(float addrspace(1)* %out, i64 %in) {
+; SI: v_cvt_f32_u32_e32
+; SI: v_cvt_f32_u32_e32
+; SI: v_mad_f32
+; SI: s_endpgm
+define void @uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) {
entry:
%0 = uitofp i64 %in to float
store float %0, float addrspace(1)* %out
ret void
}
-; FUNC-LABEL: @uint_to_fp_i1_f32:
-; SI: V_CMP_EQ_I32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
-; SI-NEXT: V_CNDMASK_B32_e64 [[RESULT:v[0-9]+]], 0, 1.000000e+00, [[CMP]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
-define void @uint_to_fp_i1_f32(float addrspace(1)* %out, i32 %in) {
+; FUNC-LABEL: {{^}}uint_to_fp_i1_to_f32:
+; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @uint_to_fp_i1_to_f32(float addrspace(1)* %out, i32 %in) {
%cmp = icmp eq i32 %in, 0
%fp = uitofp i1 %cmp to float
store float %fp, float addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: @uint_to_fp_i1_f32_load:
-; SI: V_CNDMASK_B32_e64 [[RESULT:v[0-9]+]], 0, 1.000000e+00
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
-define void @uint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 %in) {
+; FUNC-LABEL: {{^}}uint_to_fp_i1_to_f32_load:
+; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @uint_to_fp_i1_to_f32_load(float addrspace(1)* %out, i1 %in) {
%fp = uitofp i1 %in to float
store float %fp, float addrspace(1)* %out, align 4
ret void
diff --git a/test/CodeGen/R600/unaligned-load-store.ll b/test/CodeGen/R600/unaligned-load-store.ll
index 4df69d1e5f16..47fba78544d9 100644
--- a/test/CodeGen/R600/unaligned-load-store.ll
+++ b/test/CodeGen/R600/unaligned-load-store.ll
@@ -1,17 +1,112 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
-; SI-LABEL: @unaligned_load_store_i32:
-; DS_READ_U32 {{v[0-9]+}}, 0, [[REG]]
+; SI-LABEL: {{^}}unaligned_load_store_i32:
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_write_b32
+; SI: s_endpgm
define void @unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind {
%v = load i32 addrspace(3)* %p, align 1
store i32 %v, i32 addrspace(3)* %r, align 1
ret void
}
-; SI-LABEL: @unaligned_load_store_v4i32:
-; DS_READ_U32 {{v[0-9]+}}, 0, [[REG]]
+; SI-LABEL: {{^}}unaligned_load_store_v4i32:
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+
+; SI: ds_write_b32
+; SI: ds_write_b32
+; SI: ds_write_b32
+; SI: ds_write_b32
+; SI: s_endpgm
define void @unaligned_load_store_v4i32(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) nounwind {
%v = load <4 x i32> addrspace(3)* %p, align 1
store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1
ret void
}
+
+; SI-LABEL: {{^}}load_lds_i64_align_4:
+; SI: ds_read2_b32
+; SI: s_endpgm
+define void @load_lds_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
+ %val = load i64 addrspace(3)* %in, align 4
+ store i64 %val, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; SI-LABEL: {{^}}load_lds_i64_align_4_with_offset
+; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:8 offset1:9
+; SI: s_endpgm
+define void @load_lds_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
+ %ptr = getelementptr i64 addrspace(3)* %in, i32 4
+ %val = load i64 addrspace(3)* %ptr, align 4
+ store i64 %val, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; SI-LABEL: {{^}}load_lds_i64_align_4_with_split_offset:
+; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
+; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:0 offset1:1
+; SI: s_endpgm
+define void @load_lds_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
+ %ptr = bitcast i64 addrspace(3)* %in to i32 addrspace(3)*
+ %ptr255 = getelementptr i32 addrspace(3)* %ptr, i32 255
+ %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)*
+ %val = load i64 addrspace(3)* %ptri64, align 4
+ store i64 %val, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FIXME: Need to fix this case.
+; define void @load_lds_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
+; %val = load i64 addrspace(3)* %in, align 1
+; store i64 %val, i64 addrspace(1)* %out, align 8
+; ret void
+; }
+
+; SI-LABEL: {{^}}store_lds_i64_align_4:
+; SI: ds_write2_b32
+; SI: s_endpgm
+define void @store_lds_i64_align_4(i64 addrspace(3)* %out, i64 %val) #0 {
+ store i64 %val, i64 addrspace(3)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}store_lds_i64_align_4_with_offset
+; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:8 offset1:9
+; SI: s_endpgm
+define void @store_lds_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 {
+ %ptr = getelementptr i64 addrspace(3)* %out, i32 4
+ store i64 0, i64 addrspace(3)* %ptr, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}store_lds_i64_align_4_with_split_offset:
+; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
+; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: s_endpgm
+define void @store_lds_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 {
+ %ptr = bitcast i64 addrspace(3)* %out to i32 addrspace(3)*
+ %ptr255 = getelementptr i32 addrspace(3)* %ptr, i32 255
+ %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)*
+ store i64 0, i64 addrspace(3)* %out, align 4
+ ret void
+}
diff --git a/test/CodeGen/R600/unhandled-loop-condition-assertion.ll b/test/CodeGen/R600/unhandled-loop-condition-assertion.ll
index e4129c511230..9faeed6d6565 100644
--- a/test/CodeGen/R600/unhandled-loop-condition-assertion.ll
+++ b/test/CodeGen/R600/unhandled-loop-condition-assertion.ll
@@ -1,11 +1,11 @@
; REQUIRES: asserts
; XFAIL: *
-; RUN: llc -O0 -verify-machineinstrs -asm-verbose=0 -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=COMMON %s
+; RUN: llc -O0 -verify-machineinstrs -asm-verbose=0 -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=COMMON %s
; RUN: llc -O0 -verify-machineinstrs -asm-verbose=0 -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=COMMON %s
; SI hits an assertion at -O0, evergreen hits a not implemented unreachable.
-; COMMON-LABEL: @branch_true:
+; COMMON-LABEL: {{^}}branch_true:
define void @branch_true(i8 addrspace(1)* nocapture %main, i32 %main_stride) #0 {
entry:
br i1 true, label %for.end, label %for.body.lr.ph
@@ -39,9 +39,9 @@ for.end: ; preds = %for.body, %entry
ret void
}
-; COMMON-LABEL: @branch_false:
+; COMMON-LABEL: {{^}}branch_false:
; SI: .text
-; SI-NEXT: S_ENDPGM
+; SI-NEXT: s_endpgm
define void @branch_false(i8 addrspace(1)* nocapture %main, i32 %main_stride) #0 {
entry:
br i1 false, label %for.end, label %for.body.lr.ph
@@ -75,9 +75,9 @@ for.end: ; preds = %for.body, %entry
ret void
}
-; COMMON-LABEL: @branch_undef:
+; COMMON-LABEL: {{^}}branch_undef:
; SI: .text
-; SI-NEXT: S_ENDPGM
+; SI-NEXT: s_endpgm
define void @branch_undef(i8 addrspace(1)* nocapture %main, i32 %main_stride) #0 {
entry:
br i1 undef, label %for.end, label %for.body.lr.ph
diff --git a/test/CodeGen/R600/unsupported-cc.ll b/test/CodeGen/R600/unsupported-cc.ll
index f986a0251dce..8ab4faf2f145 100644
--- a/test/CodeGen/R600/unsupported-cc.ll
+++ b/test/CodeGen/R600/unsupported-cc.ll
@@ -2,7 +2,7 @@
; These tests are for condition codes that are not supported by the hardware
-; CHECK-LABEL: @slt
+; CHECK-LABEL: {{^}}slt:
; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
; CHECK-NEXT: LSHR
; CHECK-NEXT: 5(7.006492e-45)
@@ -14,7 +14,7 @@ entry:
ret void
}
-; CHECK-LABEL: @ult_i32
+; CHECK-LABEL: {{^}}ult_i32:
; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
; CHECK-NEXT: LSHR
; CHECK-NEXT: 5(7.006492e-45)
@@ -26,7 +26,7 @@ entry:
ret void
}
-; CHECK-LABEL: @ult_float
+; CHECK-LABEL: {{^}}ult_float:
; CHECK: SETGE * T{{[0-9]}}.[[CHAN:[XYZW]]], KC0[2].Z, literal.x
; CHECK-NEXT: 1084227584(5.000000e+00)
; CHECK-NEXT: SETE T{{[0-9]\.[XYZW]}}, PV.[[CHAN]], 0.0
@@ -39,7 +39,7 @@ entry:
ret void
}
-; CHECK-LABEL: @ult_float_native
+; CHECK-LABEL: {{^}}ult_float_native:
; CHECK: SETGE T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
; CHECK-NEXT: LSHR *
; CHECK-NEXT: 1084227584(5.000000e+00)
@@ -51,7 +51,7 @@ entry:
ret void
}
-; CHECK-LABEL: @olt
+; CHECK-LABEL: {{^}}olt:
; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
; CHECK-NEXT: LSHR *
; CHECK-NEXT: 1084227584(5.000000e+00)
@@ -63,7 +63,7 @@ entry:
ret void
}
-; CHECK-LABEL: @sle
+; CHECK-LABEL: {{^}}sle:
; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
; CHECK-NEXT: LSHR
; CHECK-NEXT: 6(8.407791e-45)
@@ -75,7 +75,7 @@ entry:
ret void
}
-; CHECK-LABEL: @ule_i32
+; CHECK-LABEL: {{^}}ule_i32:
; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
; CHECK-NEXT: LSHR
; CHECK-NEXT: 6(8.407791e-45)
@@ -87,7 +87,7 @@ entry:
ret void
}
-; CHECK-LABEL: @ule_float
+; CHECK-LABEL: {{^}}ule_float:
; CHECK: SETGT * T{{[0-9]}}.[[CHAN:[XYZW]]], KC0[2].Z, literal.x
; CHECK-NEXT: 1084227584(5.000000e+00)
; CHECK-NEXT: SETE T{{[0-9]\.[XYZW]}}, PV.[[CHAN]], 0.0
@@ -100,7 +100,7 @@ entry:
ret void
}
-; CHECK-LABEL: @ule_float_native
+; CHECK-LABEL: {{^}}ule_float_native:
; CHECK: SETGT T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
; CHECK-NEXT: LSHR *
; CHECK-NEXT: 1084227584(5.000000e+00)
@@ -112,7 +112,7 @@ entry:
ret void
}
-; CHECK-LABEL: @ole
+; CHECK-LABEL: {{^}}ole:
; CHECK: SETGE T{{[0-9]\.[XYZW]}}, literal.x, KC0[2].Z
; CHECK-NEXT: LSHR *
; CHECK-NEXT:1084227584(5.000000e+00)
diff --git a/test/CodeGen/R600/urecip.ll b/test/CodeGen/R600/urecip.ll
index e808e3d2cf19..132f0009cda1 100644
--- a/test/CodeGen/R600/urecip.ll
+++ b/test/CodeGen/R600/urecip.ll
@@ -1,6 +1,6 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;CHECK: V_RCP_IFLAG_F32_e32
+;CHECK: v_rcp_iflag_f32_e32
define void @test(i32 %p, i32 %q) {
%i = udiv i32 %p, %q
diff --git a/test/CodeGen/R600/urem.ll b/test/CodeGen/R600/urem.ll
index 8045145bd10d..daa32446146b 100644
--- a/test/CodeGen/R600/urem.ll
+++ b/test/CodeGen/R600/urem.ll
@@ -1,34 +1,78 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-;The code generated by urem is long and complex and may frequently change.
-;The goal of this test is to make sure the ISel doesn't fail when it gets
-;a v2i32/v4i32 urem
+; The code generated by urem is long and complex and may frequently
+; change. The goal of this test is to make sure the ISel doesn't fail
+; when it gets a v2i32/v4i32 urem
-;EG-CHECK: @test2
-;EG-CHECK: CF_END
-;SI-CHECK: @test2
-;SI-CHECK: S_ENDPGM
+; FUNC-LABEL: {{^}}test_urem_i32:
+; SI: s_endpgm
+; EG: CF_END
+define void @test_urem_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+ %a = load i32 addrspace(1)* %in
+ %b = load i32 addrspace(1)* %b_ptr
+ %result = urem i32 %a, %b
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
-define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+; FUNC-LABEL: {{^}}test_urem_v2i32:
+; SI: s_endpgm
+; EG: CF_END
+define void @test_urem_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32> addrspace(1) * %in
- %b = load <2 x i32> addrspace(1) * %b_ptr
+ %a = load <2 x i32> addrspace(1)* %in
+ %b = load <2 x i32> addrspace(1)* %b_ptr
%result = urem <2 x i32> %a, %b
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
}
-;EG-CHECK: @test4
-;EG-CHECK: CF_END
-;SI-CHECK: @test4
-;SI-CHECK: S_ENDPGM
-
-define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+; FUNC-LABEL: {{^}}test_urem_v4i32:
+; SI: s_endpgm
+; EG: CF_END
+define void @test_urem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32> addrspace(1) * %in
- %b = load <4 x i32> addrspace(1) * %b_ptr
+ %a = load <4 x i32> addrspace(1)* %in
+ %b = load <4 x i32> addrspace(1)* %b_ptr
%result = urem <4 x i32> %a, %b
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
}
+
+; FUNC-LABEL: {{^}}test_urem_i64:
+; SI: s_endpgm
+; EG: CF_END
+define void @test_urem_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+ %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
+ %a = load i64 addrspace(1)* %in
+ %b = load i64 addrspace(1)* %b_ptr
+ %result = urem i64 %a, %b
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_urem_v2i64:
+; SI: s_endpgm
+; EG: CF_END
+define void @test_urem_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
+ %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
+ %a = load <2 x i64> addrspace(1)* %in
+ %b = load <2 x i64> addrspace(1)* %b_ptr
+ %result = urem <2 x i64> %a, %b
+ store <2 x i64> %result, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_urem_v4i64:
+; SI: s_endpgm
+; EG: CF_END
+define void @test_urem_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
+ %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
+ %a = load <4 x i64> addrspace(1)* %in
+ %b = load <4 x i64> addrspace(1)* %b_ptr
+ %result = urem <4 x i64> %a, %b
+ store <4 x i64> %result, <4 x i64> addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/use-sgpr-multiple-times.ll b/test/CodeGen/R600/use-sgpr-multiple-times.ll
new file mode 100644
index 000000000000..97d73ba74bc5
--- /dev/null
+++ b/test/CodeGen/R600/use-sgpr-multiple-times.ll
@@ -0,0 +1,96 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.fma.f32(float, float, float) #1
+declare float @llvm.fmuladd.f32(float, float, float) #1
+declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) #1
+
+
+; SI-LABEL: {{^}}test_sgpr_use_twice_binop:
+; SI: s_load_dword [[SGPR:s[0-9]+]],
+; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]]
+; SI: buffer_store_dword [[RESULT]]
+define void @test_sgpr_use_twice_binop(float addrspace(1)* %out, float %a) #0 {
+ %dbl = fadd float %a, %a
+ store float %dbl, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_sgpr_use_three_ternary_op:
+; SI: s_load_dword [[SGPR:s[0-9]+]],
+; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]]
+; SI: buffer_store_dword [[RESULT]]
+define void @test_sgpr_use_three_ternary_op(float addrspace(1)* %out, float %a) #0 {
+ %fma = call float @llvm.fma.f32(float %a, float %a, float %a) #1
+ store float %fma, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_b:
+; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
+; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[SGPR0]], [[VGPR1]]
+; SI: buffer_store_dword [[RESULT]]
+define void @test_sgpr_use_twice_ternary_op_a_a_b(float addrspace(1)* %out, float %a, float %b) #0 {
+ %fma = call float @llvm.fma.f32(float %a, float %a, float %b) #1
+ store float %fma, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_b_a:
+; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
+; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]]
+; SI: buffer_store_dword [[RESULT]]
+define void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace(1)* %out, float %a, float %b) #0 {
+ %fma = call float @llvm.fma.f32(float %a, float %b, float %a) #1
+ store float %fma, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_b_a_a:
+; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
+; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]]
+; SI: buffer_store_dword [[RESULT]]
+define void @test_sgpr_use_twice_ternary_op_b_a_a(float addrspace(1)* %out, float %a, float %b) #0 {
+ %fma = call float @llvm.fma.f32(float %b, float %a, float %a) #1
+ store float %fma, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_imm:
+; SI: s_load_dword [[SGPR:s[0-9]+]]
+; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0
+; SI: buffer_store_dword [[RESULT]]
+define void @test_sgpr_use_twice_ternary_op_a_a_imm(float addrspace(1)* %out, float %a) #0 {
+ %fma = call float @llvm.fma.f32(float %a, float %a, float 2.0) #1
+ store float %fma, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_imm_a:
+; SI: s_load_dword [[SGPR:s[0-9]+]]
+; SI: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[SGPR]], [[SGPR]]
+; SI: buffer_store_dword [[RESULT]]
+define void @test_sgpr_use_twice_ternary_op_a_imm_a(float addrspace(1)* %out, float %a) #0 {
+ %fma = call float @llvm.fma.f32(float %a, float 2.0, float %a) #1
+ store float %fma, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; Don't use fma since fma c, x, y is canonicalized to fma x, c, y
+; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_imm_a_a:
+; SI: s_load_dword [[SGPR:s[0-9]+]]
+; SI: v_mad_i32_i24 [[RESULT:v[0-9]+]], 2, [[SGPR]], [[SGPR]]
+; SI: buffer_store_dword [[RESULT]]
+define void @test_sgpr_use_twice_ternary_op_imm_a_a(i32 addrspace(1)* %out, i32 %a) #0 {
+ %fma = call i32 @llvm.AMDGPU.imad24(i32 2, i32 %a, i32 %a) #1
+ store i32 %fma, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/usubo.ll b/test/CodeGen/R600/usubo.ll
index d57a2c7f773e..1af595421b84 100644
--- a/test/CodeGen/R600/usubo.ll
+++ b/test/CodeGen/R600/usubo.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s
declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
declare { i64, i1 } @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone
-; FUNC-LABEL: @usubo_i64_zext
+; FUNC-LABEL: {{^}}usubo_i64_zext:
define void @usubo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) nounwind
%val = extractvalue { i64, i1 } %usub, 0
@@ -15,8 +15,8 @@ define void @usubo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
ret void
}
-; FUNC-LABEL: @s_usubo_i32
-; SI: S_SUB_I32
+; FUNC-LABEL: {{^}}s_usubo_i32:
+; SI: s_sub_i32
define void @s_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind {
%usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) nounwind
%val = extractvalue { i32, i1 } %usub, 0
@@ -26,8 +26,8 @@ define void @s_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32
ret void
}
-; FUNC-LABEL: @v_usubo_i32
-; SI: V_SUBREV_I32_e32
+; FUNC-LABEL: {{^}}v_usubo_i32:
+; SI: v_subrev_i32_e32
define void @v_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%a = load i32 addrspace(1)* %aptr, align 4
%b = load i32 addrspace(1)* %bptr, align 4
@@ -39,9 +39,9 @@ define void @v_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32
ret void
}
-; FUNC-LABEL: @s_usubo_i64
-; SI: S_SUB_I32
-; SI: S_SUBB_U32
+; FUNC-LABEL: {{^}}s_usubo_i64:
+; SI: s_sub_u32
+; SI: s_subb_u32
define void @s_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind {
%usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) nounwind
%val = extractvalue { i64, i1 } %usub, 0
@@ -51,9 +51,9 @@ define void @s_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64
ret void
}
-; FUNC-LABEL: @v_usubo_i64
-; SI: V_SUB_I32
-; SI: V_SUBB_U32
+; FUNC-LABEL: {{^}}v_usubo_i64:
+; SI: v_sub_i32
+; SI: v_subb_u32
define void @v_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
%a = load i64 addrspace(1)* %aptr, align 4
%b = load i64 addrspace(1)* %bptr, align 4
diff --git a/test/CodeGen/R600/v1i64-kernel-arg.ll b/test/CodeGen/R600/v1i64-kernel-arg.ll
index 2aa1221b366e..31755125c03b 100644
--- a/test/CodeGen/R600/v1i64-kernel-arg.ll
+++ b/test/CodeGen/R600/v1i64-kernel-arg.ll
@@ -2,14 +2,14 @@
; XFAIL: *
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck %s
-; CHECK-LABEL: @kernel_arg_i64
+; CHECK-LABEL: {{^}}kernel_arg_i64:
define void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
store i64 %a, i64 addrspace(1)* %out, align 8
ret void
}
; i64 arg works, v1i64 arg does not.
-; CHECK-LABEL: @kernel_arg_v1i64
+; CHECK-LABEL: {{^}}kernel_arg_v1i64:
define void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind {
store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8
ret void
diff --git a/test/CodeGen/R600/v_cndmask.ll b/test/CodeGen/R600/v_cndmask.ll
index 84087ee78d59..410e1b5a47a7 100644
--- a/test/CodeGen/R600/v_cndmask.ll
+++ b/test/CodeGen/R600/v_cndmask.ll
@@ -1,14 +1,38 @@
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-; SI: @v_cnd_nan
-; SI: V_CNDMASK_B32_e64 v{{[0-9]}},
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; SI-LABEL: {{^}}v_cnd_nan_nosgpr:
+; SI: v_cndmask_b32_e64 v{{[0-9]}}, v{{[0-9]}}, -1, s{{\[[0-9]+:[0-9]+\]}}
; SI-DAG: v{{[0-9]}}
; All nan values are converted to 0xffffffff
-; SI-DAG: -1
-define void @v_cnd_nan(float addrspace(1)* %out, i32 %c, float %f) {
-entry:
- %0 = icmp ne i32 %c, 0
- %1 = select i1 %0, float 0xFFFFFFFFE0000000, float %f
- store float %1, float addrspace(1)* %out
+; SI: s_endpgm
+define void @v_cnd_nan_nosgpr(float addrspace(1)* %out, i32 %c, float addrspace(1)* %fptr) #0 {
+ %idx = call i32 @llvm.r600.read.tidig.x() #1
+ %f.gep = getelementptr float addrspace(1)* %fptr, i32 %idx
+ %f = load float addrspace(1)* %fptr
+ %setcc = icmp ne i32 %c, 0
+ %select = select i1 %setcc, float 0xFFFFFFFFE0000000, float %f
+ store float %select, float addrspace(1)* %out
ret void
}
+
+
+; This requires slightly trickier SGPR operand legalization since the
+; single constant bus SGPR usage is the last operand, and it should
+; never be moved.
+
+; SI-LABEL: {{^}}v_cnd_nan:
+; SI: v_cndmask_b32_e64 v{{[0-9]}}, v{{[0-9]}}, -1, s{{\[[0-9]+:[0-9]+\]}}
+; SI-DAG: v{{[0-9]}}
+; All nan values are converted to 0xffffffff
+; SI: s_endpgm
+define void @v_cnd_nan(float addrspace(1)* %out, i32 %c, float %f) #0 {
+ %setcc = icmp ne i32 %c, 0
+ %select = select i1 %setcc, float 0xFFFFFFFFE0000000, float %f
+ store float %select, float addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/valu-i1.ll b/test/CodeGen/R600/valu-i1.ll
index 5d5e3ff63a47..a4027178431b 100644
--- a/test/CodeGen/R600/valu-i1.ll
+++ b/test/CodeGen/R600/valu-i1.ll
@@ -1,10 +1,13 @@
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -enable-misched -asm-verbose < %s | FileCheck -check-prefix=SI %s
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+; SI-LABEL: @test_if
; Make sure the i1 values created by the cfg structurizer pass are
; moved using VALU instructions
-; SI-NOT: S_MOV_B64 s[{{[0-9]:[0-9]}}], -1
-; SI: V_MOV_B32_e32 v{{[0-9]}}, -1
-define void @test_if(i32 %a, i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) {
+; SI-NOT: s_mov_b64 s[{{[0-9]:[0-9]}}], -1
+; SI: v_mov_b32_e32 v{{[0-9]}}, -1
+define void @test_if(i32 %a, i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) #1 {
entry:
switch i32 %a, label %default [
i32 0, label %case0
@@ -37,3 +40,150 @@ else:
end:
ret void
}
+
+; SI-LABEL: @simple_test_v_if
+; SI: v_cmp_ne_i32_e64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0
+; SI: s_and_saveexec_b64 [[BR_SREG]], [[BR_SREG]]
+; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]]
+
+; SI: ; BB#1
+; SI: buffer_store_dword
+; SI: s_endpgm
+
+; SI: BB1_2:
+; SI: s_or_b64 exec, exec, [[BR_SREG]]
+; SI: s_endpgm
+define void @simple_test_v_if(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %is.0 = icmp ne i32 %tid, 0
+ br i1 %is.0, label %store, label %exit
+
+store:
+ %gep = getelementptr i32 addrspace(1)* %dst, i32 %tid
+ store i32 999, i32 addrspace(1)* %gep
+ ret void
+
+exit:
+ ret void
+}
+
+; SI-LABEL: @simple_test_v_loop
+; SI: v_cmp_ne_i32_e64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0
+; SI: s_and_saveexec_b64 [[BR_SREG]], [[BR_SREG]]
+; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]]
+; SI: s_cbranch_execz BB2_2
+
+; SI: ; BB#1:
+; SI: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
+
+; SI: BB2_3:
+; SI: buffer_load_dword
+; SI: buffer_store_dword
+; SI: v_cmp_eq_i32_e32 vcc,
+; SI: s_or_b64 [[OR_SREG:s\[[0-9]+:[0-9]+\]]]
+; SI: v_add_i32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
+; SI: s_andn2_b64 exec, exec, [[OR_SREG]]
+; SI: s_cbranch_execnz BB2_3
+
+define void @simple_test_v_loop(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 {
+entry:
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %is.0 = icmp ne i32 %tid, 0
+ %limit = add i32 %tid, 64
+ br i1 %is.0, label %loop, label %exit
+
+loop:
+ %i = phi i32 [%tid, %entry], [%i.inc, %loop]
+ %gep.src = getelementptr i32 addrspace(1)* %src, i32 %i
+ %gep.dst = getelementptr i32 addrspace(1)* %dst, i32 %i
+ %load = load i32 addrspace(1)* %src
+ store i32 %load, i32 addrspace(1)* %gep.dst
+ %i.inc = add nsw i32 %i, 1
+ %cmp = icmp eq i32 %limit, %i.inc
+ br i1 %cmp, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+; SI-LABEL: @multi_vcond_loop
+
+; Load loop limit from buffer
+; Branch to exit if uniformly not taken
+; SI: ; BB#0:
+; SI: buffer_load_dword [[VBOUND:v[0-9]+]]
+; SI: v_cmp_gt_i32_e64 [[OUTER_CMP_SREG:s\[[0-9]+:[0-9]+\]]]
+; SI: s_and_saveexec_b64 [[OUTER_CMP_SREG]], [[OUTER_CMP_SREG]]
+; SI: s_xor_b64 [[OUTER_CMP_SREG]], exec, [[OUTER_CMP_SREG]]
+; SI: s_cbranch_execz BB3_2
+
+; Initialize inner condition to false
+; SI: ; BB#1:
+; SI: s_mov_b64 [[ZERO:s\[[0-9]+:[0-9]+\]]], 0{{$}}
+; SI: s_mov_b64 [[COND_STATE:s\[[0-9]+:[0-9]+\]]], [[ZERO]]
+
+; Clear exec bits for workitems that load -1s
+; SI: BB3_3:
+; SI: buffer_load_dword [[A:v[0-9]+]]
+; SI: buffer_load_dword [[B:v[0-9]+]]
+; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], [[A]], -1
+; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_1:s\[[0-9]+:[0-9]+\]]], [[B]], -1
+; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]]
+; SI: s_and_saveexec_b64 [[ORNEG1]], [[ORNEG1]]
+; SI: s_xor_b64 [[ORNEG1]], exec, [[ORNEG1]]
+; SI: s_cbranch_execz BB3_5
+
+; SI: BB#4:
+; SI: buffer_store_dword
+; SI: v_cmp_ge_i64_e32 vcc
+; SI: s_or_b64 [[COND_STATE]], vcc, [[COND_STATE]]
+
+; SI: BB3_5:
+; SI: s_or_b64 exec, exec, [[ORNEG1]]
+; SI: s_or_b64 [[COND_STATE]], [[ORNEG1]], [[COND_STATE]]
+; SI: s_andn2_b64 exec, exec, [[COND_STATE]]
+; SI: s_cbranch_execnz BB3_3
+
+; SI: BB#6
+; SI: s_or_b64 exec, exec, [[COND_STATE]]
+
+; SI: BB3_2:
+; SI-NOT: [[COND_STATE]]
+; SI: s_endpgm
+
+define void @multi_vcond_loop(i32 addrspace(1)* noalias nocapture %arg, i32 addrspace(1)* noalias nocapture readonly %arg1, i32 addrspace(1)* noalias nocapture readonly %arg2, i32 addrspace(1)* noalias nocapture readonly %arg3) #1 {
+bb:
+ %tmp = tail call i32 @llvm.r600.read.tidig.x() #0
+ %tmp4 = sext i32 %tmp to i64
+ %tmp5 = getelementptr inbounds i32 addrspace(1)* %arg3, i64 %tmp4
+ %tmp6 = load i32 addrspace(1)* %tmp5, align 4
+ %tmp7 = icmp sgt i32 %tmp6, 0
+ %tmp8 = sext i32 %tmp6 to i64
+ br i1 %tmp7, label %bb10, label %bb26
+
+bb10: ; preds = %bb, %bb20
+ %tmp11 = phi i64 [ %tmp23, %bb20 ], [ 0, %bb ]
+ %tmp12 = add nsw i64 %tmp11, %tmp4
+ %tmp13 = getelementptr inbounds i32 addrspace(1)* %arg1, i64 %tmp12
+ %tmp14 = load i32 addrspace(1)* %tmp13, align 4
+ %tmp15 = getelementptr inbounds i32 addrspace(1)* %arg2, i64 %tmp12
+ %tmp16 = load i32 addrspace(1)* %tmp15, align 4
+ %tmp17 = icmp ne i32 %tmp14, -1
+ %tmp18 = icmp ne i32 %tmp16, -1
+ %tmp19 = and i1 %tmp17, %tmp18
+ br i1 %tmp19, label %bb20, label %bb26
+
+bb20: ; preds = %bb10
+ %tmp21 = add nsw i32 %tmp16, %tmp14
+ %tmp22 = getelementptr inbounds i32 addrspace(1)* %arg, i64 %tmp12
+ store i32 %tmp21, i32 addrspace(1)* %tmp22, align 4
+ %tmp23 = add nuw nsw i64 %tmp11, 1
+ %tmp24 = icmp slt i64 %tmp23, %tmp8
+ br i1 %tmp24, label %bb10, label %bb26
+
+bb26: ; preds = %bb10, %bb20, %bb
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/R600/vector-alloca.ll b/test/CodeGen/R600/vector-alloca.ll
index ec1995f68089..8f73f657ccd8 100644
--- a/test/CodeGen/R600/vector-alloca.ll
+++ b/test/CodeGen/R600/vector-alloca.ll
@@ -1,8 +1,8 @@
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=verde -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=verde -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=verde -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=verde -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
-; FUNC-LABEL: @vector_read
+; FUNC-LABEL: {{^}}vector_read:
; EG: MOV
; EG: MOV
; EG: MOV
@@ -25,7 +25,7 @@ entry:
ret void
}
-; FUNC-LABEL: @vector_write
+; FUNC-LABEL: {{^}}vector_write:
; EG: MOV
; EG: MOV
; EG: MOV
@@ -53,7 +53,7 @@ entry:
; This test should be optimize to:
; store i32 0, i32 addrspace(1)* %out
-; FUNC-LABEL: @bitcast_gep
+; FUNC-LABEL: {{^}}bitcast_gep:
; EG: STORE_RAW
define void @bitcast_gep(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) {
entry:
diff --git a/test/CodeGen/R600/vertex-fetch-encoding.ll b/test/CodeGen/R600/vertex-fetch-encoding.ll
index 7ea7a5c079cf..e24744e5ea4a 100644
--- a/test/CodeGen/R600/vertex-fetch-encoding.ll
+++ b/test/CodeGen/R600/vertex-fetch-encoding.ll
@@ -1,9 +1,9 @@
; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=barts | FileCheck --check-prefix=NI-CHECK %s
; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=cayman | FileCheck --check-prefix=CM-CHECK %s
-; NI-CHECK: @vtx_fetch32
+; NI-CHECK: {{^}}vtx_fetch32:
; NI-CHECK: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0 ; encoding: [0x40,0x01,0x0[[GPR]],0x10,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x08,0x00
-; CM-CHECK: @vtx_fetch32
+; CM-CHECK: {{^}}vtx_fetch32:
; CM-CHECK: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0 ; encoding: [0x40,0x01,0x0[[GPR]],0x00,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x00,0x00
define void @vtx_fetch32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
@@ -13,7 +13,7 @@ entry:
ret void
}
-; NI-CHECK: @vtx_fetch128
+; NI-CHECK: {{^}}vtx_fetch128:
; NI-CHECK: VTX_READ_128 T[[DST:[0-9]]].XYZW, T[[SRC:[0-9]]].X, 0 ; encoding: [0x40,0x01,0x0[[SRC]],0x40,0x0[[DST]],0x10,0x8d,0x18,0x00,0x00,0x08,0x00
; XXX: Add a case for Cayman when v4i32 stores are supported.
diff --git a/test/CodeGen/R600/vop-shrink.ll b/test/CodeGen/R600/vop-shrink.ll
index 54e588d80842..11d91293c5b4 100644
--- a/test/CodeGen/R600/vop-shrink.ll
+++ b/test/CodeGen/R600/vop-shrink.ll
@@ -1,14 +1,9 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; XXX: This testis for a bug in the SIShrinkInstruction pass and it will be
-; relevant once we are selecting 64-bit instructions. We are
-; currently selecting mostly 32-bit instruction, so the
-; SIShrinkInstructions pass isn't doing much.
-; XFAIL: *
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; Test that we correctly commute a sub instruction
-; FUNC-LABEL: @sub_rev
-; SI-NOT: V_SUB_I32_e32 v{{[0-9]+}}, s
-; SI: V_SUBREV_I32_e32 v{{[0-9]+}}, s
+; FUNC-LABEL: {{^}}sub_rev:
+; SI-NOT: v_sub_i32_e32 v{{[0-9]+}}, s
+; SI: v_subrev_i32_e32 v{{[0-9]+}}, s
; ModuleID = 'vop-shrink.ll'
@@ -34,6 +29,20 @@ endif: ; preds = %else, %if
ret void
}
+; Test that we fold an immediate that was illegal for a 64-bit op into the
+; 32-bit op when we shrink it.
+
+; FUNC-LABEL: {{^}}add_fold:
+; SI: v_add_f32_e32 v{{[0-9]+}}, 0x44800000
+define void @add_fold(float addrspace(1)* %out) {
+entry:
+ %tmp = call i32 @llvm.r600.read.tidig.x()
+ %tmp1 = uitofp i32 %tmp to float
+ %tmp2 = fadd float %tmp1, 1.024000e+03
+ store float %tmp2, float addrspace(1)* %out
+ ret void
+}
+
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.tidig.x() #0
diff --git a/test/CodeGen/R600/vselect.ll b/test/CodeGen/R600/vselect.ll
index dca7b067b26e..1fa8d4a49e10 100644
--- a/test/CodeGen/R600/vselect.ll
+++ b/test/CodeGen/R600/vselect.ll
@@ -1,13 +1,13 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-;RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+;RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
-;EG-CHECK: @test_select_v2i32
+;EG-CHECK: {{^}}test_select_v2i32:
;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI-CHECK: @test_select_v2i32
-;SI-CHECK: V_CNDMASK_B32_e64
-;SI-CHECK: V_CNDMASK_B32_e64
+;SI-CHECK: {{^}}test_select_v2i32:
+;SI-CHECK: v_cndmask_b32_e64
+;SI-CHECK: v_cndmask_b32_e64
define void @test_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in0, <2 x i32> addrspace(1)* %in1) {
entry:
@@ -19,13 +19,13 @@ entry:
ret void
}
-;EG-CHECK: @test_select_v2f32
+;EG-CHECK: {{^}}test_select_v2f32:
;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI-CHECK: @test_select_v2f32
-;SI-CHECK: V_CNDMASK_B32_e64
-;SI-CHECK: V_CNDMASK_B32_e64
+;SI-CHECK: {{^}}test_select_v2f32:
+;SI-CHECK: v_cndmask_b32_e64
+;SI-CHECK: v_cndmask_b32_e64
define void @test_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in0, <2 x float> addrspace(1)* %in1) {
entry:
@@ -37,17 +37,17 @@ entry:
ret void
}
-;EG-CHECK: @test_select_v4i32
+;EG-CHECK: {{^}}test_select_v4i32:
;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI-CHECK: @test_select_v4i32
-;SI-CHECK: V_CNDMASK_B32_e64
-;SI-CHECK: V_CNDMASK_B32_e64
-;SI-CHECK: V_CNDMASK_B32_e64
-;SI-CHECK: V_CNDMASK_B32_e64
+;SI-CHECK: {{^}}test_select_v4i32:
+;SI-CHECK: v_cndmask_b32_e64
+;SI-CHECK: v_cndmask_b32_e64
+;SI-CHECK: v_cndmask_b32_e64
+;SI-CHECK: v_cndmask_b32_e64
define void @test_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1) {
entry:
@@ -59,7 +59,7 @@ entry:
ret void
}
-;EG-CHECK: @test_select_v4f32
+;EG-CHECK: {{^}}test_select_v4f32:
;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
diff --git a/test/CodeGen/R600/vselect64.ll b/test/CodeGen/R600/vselect64.ll
index 604695b4fa6b..ef85ebe7899f 100644
--- a/test/CodeGen/R600/vselect64.ll
+++ b/test/CodeGen/R600/vselect64.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
; XXX: Merge this test into vselect.ll once SI supports 64-bit select.
-; CHECK-LABEL: @test_select_v4i64
+; CHECK-LABEL: {{^}}test_select_v4i64:
; Make sure the vectors aren't being stored on the stack. We know they are
; being stored on the stack if the shaders uses at leat 10 registers.
; CHECK-NOT: {{\**}} MOV T{{[0-9][0-9]}}.X
diff --git a/test/CodeGen/R600/vtx-fetch-branch.ll b/test/CodeGen/R600/vtx-fetch-branch.ll
index 0fc99dee0dbe..bcbe34ea543b 100644
--- a/test/CodeGen/R600/vtx-fetch-branch.ll
+++ b/test/CodeGen/R600/vtx-fetch-branch.ll
@@ -6,7 +6,7 @@
; after the fetch clause.
-; CHECK-LABEL: @test
+; CHECK-LABEL: {{^}}test:
; CHECK-NOT: ALU_POP_AFTER
; CHECK: TEX
; CHECK-NEXT: POP
diff --git a/test/CodeGen/R600/vtx-schedule.ll b/test/CodeGen/R600/vtx-schedule.ll
index ce852c5efeff..8254c9923477 100644
--- a/test/CodeGen/R600/vtx-schedule.ll
+++ b/test/CodeGen/R600/vtx-schedule.ll
@@ -4,7 +4,7 @@
; the result of another VTX_READ instruction were being grouped in the
; same fetch clasue.
-; CHECK: @test
+; CHECK: {{^}}test:
; CHECK: Fetch clause
; CHECK: VTX_READ_32 [[IN0:T[0-9]+\.X]], [[IN0]], 0
; CHECK: Fetch clause
diff --git a/test/CodeGen/R600/wait.ll b/test/CodeGen/R600/wait.ll
index 2cf88fe9f735..b30f86ccd0d6 100644
--- a/test/CodeGen/R600/wait.ll
+++ b/test/CodeGen/R600/wait.ll
@@ -1,37 +1,45 @@
-; RUN: llc < %s -march=r600 -mcpu=SI --verify-machineinstrs | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace %s
-;CHECK-LABEL: @main
-;CHECK: S_WAITCNT lgkmcnt(0)
-;CHECK: S_WAITCNT vmcnt(0)
-;CHECK: S_WAITCNT expcnt(0) lgkmcnt(0)
-
-define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
+; CHECK-LABEL: {{^}}main:
+; CHECK: s_load_dwordx4
+; CHECK: s_load_dwordx4
+; CHECK: s_waitcnt lgkmcnt(0){{$}}
+; CHECK: s_waitcnt vmcnt(0){{$}}
+; CHECK: s_waitcnt expcnt(0) lgkmcnt(0){{$}}
+define void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, <16 x i8> addrspace(2)* inreg %arg3, <16 x i8> addrspace(2)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(2)* inreg %constptr) #0 {
main_body:
- %10 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
- %11 = load <16 x i8> addrspace(2)* %10, !tbaa !0
- %12 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %11, i32 0, i32 %6)
- %13 = extractelement <4 x float> %12, i32 0
- %14 = extractelement <4 x float> %12, i32 1
- %15 = extractelement <4 x float> %12, i32 2
- %16 = extractelement <4 x float> %12, i32 3
- %17 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
- %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0
- %19 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %6)
- %20 = extractelement <4 x float> %19, i32 0
- %21 = extractelement <4 x float> %19, i32 1
- %22 = extractelement <4 x float> %19, i32 2
- %23 = extractelement <4 x float> %19, i32 3
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %20, float %21, float %22, float %23)
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %13, float %14, float %15, float %16)
+ %tmp = getelementptr <16 x i8> addrspace(2)* %arg3, i32 0
+ %tmp10 = load <16 x i8> addrspace(2)* %tmp, !tbaa !0
+ %tmp11 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %tmp10, i32 0, i32 %arg6)
+ %tmp12 = extractelement <4 x float> %tmp11, i32 0
+ %tmp13 = extractelement <4 x float> %tmp11, i32 1
+ call void @llvm.AMDGPU.barrier.global() #1
+ %tmp14 = extractelement <4 x float> %tmp11, i32 2
+; %tmp15 = extractelement <4 x float> %tmp11, i32 3
+ %tmp15 = load float addrspace(2)* %constptr, align 4 ; Force waiting for expcnt and lgkmcnt
+ %tmp16 = getelementptr <16 x i8> addrspace(2)* %arg3, i32 1
+ %tmp17 = load <16 x i8> addrspace(2)* %tmp16, !tbaa !0
+ %tmp18 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %tmp17, i32 0, i32 %arg6)
+ %tmp19 = extractelement <4 x float> %tmp18, i32 0
+ %tmp20 = extractelement <4 x float> %tmp18, i32 1
+ %tmp21 = extractelement <4 x float> %tmp18, i32 2
+ %tmp22 = extractelement <4 x float> %tmp18, i32 3
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %tmp19, float %tmp20, float %tmp21, float %tmp22)
+ call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %tmp12, float %tmp13, float %tmp14, float %tmp15)
ret void
}
+; Function Attrs: noduplicate nounwind
+declare void @llvm.AMDGPU.barrier.global() #1
+
; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
+declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #2
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" }
-attributes #1 = { nounwind readnone }
+attributes #1 = { noduplicate nounwind }
+attributes #2 = { nounwind readnone }
-!0 = metadata !{metadata !"const", null, i32 1}
+!0 = !{!1, !1, i64 0, i32 1}
+!1 = !{!"const", null}
diff --git a/test/CodeGen/R600/work-item-intrinsics.ll b/test/CodeGen/R600/work-item-intrinsics.ll
index 01236590742a..37c0e0f304ce 100644
--- a/test/CodeGen/R600/work-item-intrinsics.ll
+++ b/test/CodeGen/R600/work-item-intrinsics.ll
@@ -1,13 +1,14 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
-
-; R600-CHECK: @ngroups_x
-; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV [[VAL]], KC0[0].X
-; SI-CHECK: @ngroups_x
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+
+; FUNC-LABEL: {{^}}ngroups_x:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[0].X
+
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VVAL]]
define void @ngroups_x (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.ngroups.x() #0
@@ -15,13 +16,13 @@ entry:
ret void
}
-; R600-CHECK: @ngroups_y
-; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV [[VAL]], KC0[0].Y
-; SI-CHECK: @ngroups_y
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x1
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+; FUNC-LABEL: {{^}}ngroups_y:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[0].Y
+
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VVAL]]
define void @ngroups_y (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.ngroups.y() #0
@@ -29,13 +30,13 @@ entry:
ret void
}
-; R600-CHECK: @ngroups_z
-; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV [[VAL]], KC0[0].Z
-; SI-CHECK: @ngroups_z
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x2
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+; FUNC-LABEL: {{^}}ngroups_z:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[0].Z
+
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VVAL]]
define void @ngroups_z (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.ngroups.z() #0
@@ -43,13 +44,13 @@ entry:
ret void
}
-; R600-CHECK: @global_size_x
-; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV [[VAL]], KC0[0].W
-; SI-CHECK: @global_size_x
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x3
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+; FUNC-LABEL: {{^}}global_size_x:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[0].W
+
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VVAL]]
define void @global_size_x (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.global.size.x() #0
@@ -57,13 +58,13 @@ entry:
ret void
}
-; R600-CHECK: @global_size_y
-; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV [[VAL]], KC0[1].X
-; SI-CHECK: @global_size_y
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x4
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+; FUNC-LABEL: {{^}}global_size_y:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[1].X
+
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VVAL]]
define void @global_size_y (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.global.size.y() #0
@@ -71,13 +72,13 @@ entry:
ret void
}
-; R600-CHECK: @global_size_z
-; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV [[VAL]], KC0[1].Y
-; SI-CHECK: @global_size_z
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x5
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+; FUNC-LABEL: {{^}}global_size_z:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[1].Y
+
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VVAL]]
define void @global_size_z (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.global.size.z() #0
@@ -85,13 +86,13 @@ entry:
ret void
}
-; R600-CHECK: @local_size_x
-; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV [[VAL]], KC0[1].Z
-; SI-CHECK: @local_size_x
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x6
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+; FUNC-LABEL: {{^}}local_size_x:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[1].Z
+
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VVAL]]
define void @local_size_x (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.local.size.x() #0
@@ -99,13 +100,13 @@ entry:
ret void
}
-; R600-CHECK: @local_size_y
-; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV [[VAL]], KC0[1].W
-; SI-CHECK: @local_size_y
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x7
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+; FUNC-LABEL: {{^}}local_size_y:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[1].W
+
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VVAL]]
define void @local_size_y (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.local.size.y() #0
@@ -113,13 +114,13 @@ entry:
ret void
}
-; R600-CHECK: @local_size_z
-; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV [[VAL]], KC0[2].X
-; SI-CHECK: @local_size_z
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x8
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+; FUNC-LABEL: {{^}}local_size_z:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[2].X
+
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VVAL]]
define void @local_size_z (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.local.size.z() #0
@@ -127,13 +128,27 @@ entry:
ret void
}
+; FUNC-LABEL: {{^}}get_work_dim:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[2].Z
+
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VVAL]]
+define void @get_work_dim (i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.AMDGPU.read.workdim() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
; The tgid values are stored in sgprs offset by the number of user sgprs.
; Currently we always use exactly 2 user sgprs for the pointer to the
; kernel arguments, but this may change in the future.
-; SI-CHECK: @tgid_x
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], s4
-; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+; FUNC-LABEL: {{^}}tgid_x:
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s4
+; SI: buffer_store_dword [[VVAL]]
define void @tgid_x (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.x() #0
@@ -141,9 +156,9 @@ entry:
ret void
}
-; SI-CHECK: @tgid_y
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], s5
-; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+; FUNC-LABEL: {{^}}tgid_y:
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s5
+; SI: buffer_store_dword [[VVAL]]
define void @tgid_y (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.y() #0
@@ -151,9 +166,9 @@ entry:
ret void
}
-; SI-CHECK: @tgid_z
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], s6
-; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+; FUNC-LABEL: {{^}}tgid_z:
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s6
+; SI: buffer_store_dword [[VVAL]]
define void @tgid_z (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.z() #0
@@ -161,8 +176,8 @@ entry:
ret void
}
-; SI-CHECK: @tidig_x
-; SI-CHECK: BUFFER_STORE_DWORD v0
+; FUNC-LABEL: {{^}}tidig_x:
+; SI: buffer_store_dword v0
define void @tidig_x (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.x() #0
@@ -170,8 +185,8 @@ entry:
ret void
}
-; SI-CHECK: @tidig_y
-; SI-CHECK: BUFFER_STORE_DWORD v1
+; FUNC-LABEL: {{^}}tidig_y:
+; SI: buffer_store_dword v1
define void @tidig_y (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.y() #0
@@ -179,8 +194,8 @@ entry:
ret void
}
-; SI-CHECK: @tidig_z
-; SI-CHECK: BUFFER_STORE_DWORD v2
+; FUNC-LABEL: {{^}}tidig_z:
+; SI: buffer_store_dword v2
define void @tidig_z (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.z() #0
@@ -208,4 +223,6 @@ declare i32 @llvm.r600.read.tidig.x() #0
declare i32 @llvm.r600.read.tidig.y() #0
declare i32 @llvm.r600.read.tidig.z() #0
+declare i32 @llvm.AMDGPU.read.workdim() #0
+
attributes #0 = { readnone }
diff --git a/test/CodeGen/R600/wrong-transalu-pos-fix.ll b/test/CodeGen/R600/wrong-transalu-pos-fix.ll
index b1cbe3ffbc41..4e77c07c0ea1 100644
--- a/test/CodeGen/R600/wrong-transalu-pos-fix.ll
+++ b/test/CodeGen/R600/wrong-transalu-pos-fix.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
; We want all MULLO_INT inst to be last in their instruction group
-;CHECK: @fill3d
+;CHECK: {{^}}fill3d:
;CHECK-NOT: MULLO_INT T[0-9]+
; ModuleID = 'radeon'
@@ -81,6 +81,6 @@ attributes #1 = { nounwind readnone }
!opencl.kernels = !{!0, !1, !2}
-!0 = metadata !{null}
-!1 = metadata !{null}
-!2 = metadata !{void (i32 addrspace(1)*)* @fill3d}
+!0 = !{null}
+!1 = !{null}
+!2 = !{void (i32 addrspace(1)*)* @fill3d}
diff --git a/test/CodeGen/R600/xor.ll b/test/CodeGen/R600/xor.ll
index e14bd7127231..af6196dabe90 100644
--- a/test/CodeGen/R600/xor.ll
+++ b/test/CodeGen/R600/xor.ll
@@ -1,14 +1,13 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-;EG-CHECK: @xor_v2i32
-;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI-CHECK: @xor_v2i32
-;SI-CHECK: V_XOR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_XOR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; FUNC-LABEL: {{^}}xor_v2i32:
+; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; SI: v_xor_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_xor_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @xor_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in0, <2 x i32> addrspace(1)* %in1) {
%a = load <2 x i32> addrspace(1) * %in0
@@ -18,17 +17,16 @@ define void @xor_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in
ret void
}
-;EG-CHECK: @xor_v4i32
-;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; FUNC-LABEL: {{^}}xor_v4i32:
+; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI-CHECK: @xor_v4i32
-;SI-CHECK: V_XOR_B32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_XOR_B32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_XOR_B32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_XOR_B32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_xor_b32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_xor_b32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_xor_b32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_xor_b32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1) {
%a = load <4 x i32> addrspace(1) * %in0
@@ -38,25 +36,42 @@ define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in
ret void
}
-;EG-CHECK: @xor_i1
-;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
-
-;SI-CHECK: @xor_i1
-;SI-CHECK: V_XOR_B32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+; FUNC-LABEL: {{^}}xor_i1:
+; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
+; SI-DAG: v_cmp_ge_f32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, 0
+; SI-DAG: v_cmp_ge_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, 1.0
+; SI: s_xor_b64 [[XOR:s\[[0-9]+:[0-9]+\]]], [[CMP0]], [[CMP1]]
+; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, [[XOR]]
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
define void @xor_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
%a = load float addrspace(1) * %in0
%b = load float addrspace(1) * %in1
%acmp = fcmp oge float %a, 0.000000e+00
- %bcmp = fcmp oge float %b, 0.000000e+00
+ %bcmp = fcmp oge float %b, 1.000000e+00
%xor = xor i1 %acmp, %bcmp
%result = select i1 %xor, float %a, float %b
store float %result, float addrspace(1)* %out
ret void
}
-; SI-CHECK-LABEL: @vector_xor_i32
-; SI-CHECK: V_XOR_B32_e32
+; FUNC-LABEL: {{^}}v_xor_i1:
+; SI: buffer_load_ubyte [[A:v[0-9]+]]
+; SI: buffer_load_ubyte [[B:v[0-9]+]]
+; SI: v_xor_b32_e32 [[XOR:v[0-9]+]], [[A]], [[B]]
+; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[XOR]]
+; SI: buffer_store_byte [[RESULT]]
+define void @v_xor_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in0, i1 addrspace(1)* %in1) {
+ %a = load i1 addrspace(1)* %in0
+ %b = load i1 addrspace(1)* %in1
+ %xor = xor i1 %a, %b
+ store i1 %xor, i1 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}vector_xor_i32:
+; SI: v_xor_b32_e32
define void @vector_xor_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) {
%a = load i32 addrspace(1)* %in0
%b = load i32 addrspace(1)* %in1
@@ -65,24 +80,24 @@ define void @vector_xor_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32
ret void
}
-; SI-CHECK-LABEL: @scalar_xor_i32
-; SI-CHECK: S_XOR_B32
+; FUNC-LABEL: {{^}}scalar_xor_i32:
+; SI: s_xor_b32
define void @scalar_xor_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
%result = xor i32 %a, %b
store i32 %result, i32 addrspace(1)* %out
ret void
}
-; SI-CHECK-LABEL: @scalar_not_i32
-; SI-CHECK: S_NOT_B32
+; FUNC-LABEL: {{^}}scalar_not_i32:
+; SI: s_not_b32
define void @scalar_not_i32(i32 addrspace(1)* %out, i32 %a) {
%result = xor i32 %a, -1
store i32 %result, i32 addrspace(1)* %out
ret void
}
-; SI-CHECK-LABEL: @vector_not_i32
-; SI-CHECK: V_NOT_B32
+; FUNC-LABEL: {{^}}vector_not_i32:
+; SI: v_not_b32
define void @vector_not_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) {
%a = load i32 addrspace(1)* %in0
%b = load i32 addrspace(1)* %in1
@@ -91,10 +106,10 @@ define void @vector_not_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32
ret void
}
-; SI-CHECK-LABEL: @vector_xor_i64
-; SI-CHECK: V_XOR_B32_e32
-; SI-CHECK: V_XOR_B32_e32
-; SI-CHECK: S_ENDPGM
+; FUNC-LABEL: {{^}}vector_xor_i64:
+; SI: v_xor_b32_e32
+; SI: v_xor_b32_e32
+; SI: s_endpgm
define void @vector_xor_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64 addrspace(1)* %in1) {
%a = load i64 addrspace(1)* %in0
%b = load i64 addrspace(1)* %in1
@@ -103,26 +118,26 @@ define void @vector_xor_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64
ret void
}
-; SI-CHECK-LABEL: @scalar_xor_i64
-; SI-CHECK: S_XOR_B64
-; SI-CHECK: S_ENDPGM
+; FUNC-LABEL: {{^}}scalar_xor_i64:
+; SI: s_xor_b64
+; SI: s_endpgm
define void @scalar_xor_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
%result = xor i64 %a, %b
store i64 %result, i64 addrspace(1)* %out
ret void
}
-; SI-CHECK-LABEL: @scalar_not_i64
-; SI-CHECK: S_NOT_B64
+; FUNC-LABEL: {{^}}scalar_not_i64:
+; SI: s_not_b64
define void @scalar_not_i64(i64 addrspace(1)* %out, i64 %a) {
%result = xor i64 %a, -1
store i64 %result, i64 addrspace(1)* %out
ret void
}
-; SI-CHECK-LABEL: @vector_not_i64
-; SI-CHECK: V_NOT_B32
-; SI-CHECK: V_NOT_B32
+; FUNC-LABEL: {{^}}vector_not_i64:
+; SI: v_not_b32
+; SI: v_not_b32
define void @vector_not_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64 addrspace(1)* %in1) {
%a = load i64 addrspace(1)* %in0
%b = load i64 addrspace(1)* %in1
@@ -135,9 +150,8 @@ define void @vector_not_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64
; Note that in the future the backend may be smart enough to
; use an SALU instruction for this.
-; SI-CHECK-LABEL: @xor_cf
-; SI-CHECK: V_XOR
-; SI-CHECK: V_XOR
+; FUNC-LABEL: {{^}}xor_cf:
+; SI: s_xor_b64
define void @xor_cf(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b) {
entry:
%0 = icmp eq i64 %a, 0
diff --git a/test/CodeGen/R600/zero_extend.ll b/test/CodeGen/R600/zero_extend.ll
index 8585d4ab191e..7df4b48c2b9d 100644
--- a/test/CodeGen/R600/zero_extend.ll
+++ b/test/CodeGen/R600/zero_extend.ll
@@ -1,14 +1,14 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
-; R600-CHECK: @test
+; R600-CHECK: {{^}}test:
; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW
; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW
-; SI-CHECK: @test
-; SI-CHECK: S_MOV_B32 [[ZERO:s[0-9]]], 0
-; SI-CHECK: V_MOV_B32_e32 v[[V_ZERO:[0-9]]], [[ZERO]]
-; SI-CHECK: BUFFER_STORE_DWORDX2 v[0:[[V_ZERO]]{{\]}}
+; SI-CHECK: {{^}}test:
+; SI-CHECK: s_mov_b32 [[ZERO:s[0-9]]], 0{{$}}
+; SI-CHECK: v_mov_b32_e32 v[[V_ZERO:[0-9]]], [[ZERO]]
+; SI-CHECK: buffer_store_dwordx2 v[0:[[V_ZERO]]{{\]}}
define void @test(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
entry:
%0 = mul i32 %a, %b
@@ -18,8 +18,8 @@ entry:
ret void
}
-; SI-CHECK-LABEL: @testi1toi32
-; SI-CHECK: V_CNDMASK_B32
+; SI-CHECK-LABEL: {{^}}testi1toi32:
+; SI-CHECK: v_cndmask_b32
define void @testi1toi32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%0 = icmp eq i32 %a, %b
@@ -28,10 +28,10 @@ entry:
ret void
}
-; SI-CHECK-LABEL: @zext_i1_to_i64
-; SI-CHECK: V_CMP_EQ_I32
-; SI-CHECK: V_CNDMASK_B32
-; SI-CHECK: S_MOV_B32 s{{[0-9]+}}, 0
+; SI-CHECK-LABEL: {{^}}zext_i1_to_i64:
+; SI-CHECK: v_cmp_eq_i32
+; SI-CHECK: v_cndmask_b32
+; SI-CHECK: s_mov_b32 s{{[0-9]+}}, 0
define void @zext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%cmp = icmp eq i32 %a, %b
%ext = zext i1 %cmp to i64