aboutsummaryrefslogtreecommitdiff
path: root/test/CodeGen/R600
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2013-04-08 18:41:23 +0000
committerDimitry Andric <dim@FreeBSD.org>2013-04-08 18:41:23 +0000
commit4a16efa3e43e35f0cc9efe3a67f620f0017c3d36 (patch)
tree06099edc18d30894081a822b756f117cbe0b8207 /test/CodeGen/R600
parent482e7bddf617ae804dc47133cb07eb4aa81e45de (diff)
downloadsrc-4a16efa3e43e35f0cc9efe3a67f620f0017c3d36.tar.gz
src-4a16efa3e43e35f0cc9efe3a67f620f0017c3d36.zip
Vendor import of llvm trunk r178860:vendor/llvm/llvm-trunk-r178860
Notes
Notes: svn path=/vendor/llvm/dist/; revision=249259 svn path=/vendor/llvm/llvm-trunk-r178860/; revision=249260; tag=vendor/llvm/llvm-trunk-r178860
Diffstat (limited to 'test/CodeGen/R600')
-rw-r--r--test/CodeGen/R600/128bit-kernel-args.ll18
-rw-r--r--test/CodeGen/R600/add.v4i32.ll15
-rw-r--r--test/CodeGen/R600/alu-split.ll850
-rw-r--r--test/CodeGen/R600/and.v4i32.ll15
-rw-r--r--test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll36
-rw-r--r--test/CodeGen/R600/disconnected-predset-break-bug.ll29
-rw-r--r--test/CodeGen/R600/fabs.ll16
-rw-r--r--test/CodeGen/R600/fadd.ll16
-rw-r--r--test/CodeGen/R600/fadd.v4f32.ll15
-rw-r--r--test/CodeGen/R600/fcmp-cnd.ll14
-rw-r--r--test/CodeGen/R600/fcmp-cnde-int-args.ll16
-rw-r--r--test/CodeGen/R600/fcmp.ll37
-rw-r--r--test/CodeGen/R600/fdiv.v4f32.ll19
-rw-r--r--test/CodeGen/R600/floor.ll16
-rw-r--r--test/CodeGen/R600/fmad.ll19
-rw-r--r--test/CodeGen/R600/fmax.ll16
-rw-r--r--test/CodeGen/R600/fmin.ll16
-rw-r--r--test/CodeGen/R600/fmul.ll16
-rw-r--r--test/CodeGen/R600/fmul.v4f32.ll15
-rw-r--r--test/CodeGen/R600/fsub.ll16
-rw-r--r--test/CodeGen/R600/fsub.v4f32.ll15
-rw-r--r--test/CodeGen/R600/i8_to_double_to_float.ll11
-rw-r--r--test/CodeGen/R600/icmp-select-sete-reverse-args.ll18
-rw-r--r--test/CodeGen/R600/jump_address.ll50
-rw-r--r--test/CodeGen/R600/kcache-fold.ll100
-rw-r--r--test/CodeGen/R600/legalizedag-bug-expand-setcc.ll26
-rw-r--r--test/CodeGen/R600/lit.local.cfg13
-rw-r--r--test/CodeGen/R600/literals.ll32
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.mul.ll17
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.tex.ll42
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.trunc.ll16
-rw-r--r--test/CodeGen/R600/llvm.SI.fs.interp.constant.ll21
-rw-r--r--test/CodeGen/R600/llvm.SI.sample.ll106
-rw-r--r--test/CodeGen/R600/llvm.cos.ll16
-rw-r--r--test/CodeGen/R600/llvm.pow.ll19
-rw-r--r--test/CodeGen/R600/llvm.sin.ll16
-rw-r--r--test/CodeGen/R600/load.constant_addrspace.f32.ll9
-rw-r--r--test/CodeGen/R600/load.i8.ll10
-rw-r--r--test/CodeGen/R600/lshl.ll14
-rw-r--r--test/CodeGen/R600/lshr.ll14
-rw-r--r--test/CodeGen/R600/mulhu.ll16
-rw-r--r--test/CodeGen/R600/predicates.ll104
-rw-r--r--test/CodeGen/R600/reciprocal.ll16
-rw-r--r--test/CodeGen/R600/schedule-fs-loop-nested-if.ll83
-rw-r--r--test/CodeGen/R600/schedule-fs-loop-nested.ll88
-rw-r--r--test/CodeGen/R600/schedule-fs-loop.ll55
-rw-r--r--test/CodeGen/R600/schedule-if-2.ll94
-rw-r--r--test/CodeGen/R600/schedule-if.ll46
-rw-r--r--test/CodeGen/R600/schedule-vs-if-nested-loop.ll134
-rw-r--r--test/CodeGen/R600/sdiv.ll21
-rw-r--r--test/CodeGen/R600/selectcc-icmp-select-float.ll15
-rw-r--r--test/CodeGen/R600/selectcc-opt.ll64
-rw-r--r--test/CodeGen/R600/selectcc_cnde.ll11
-rw-r--r--test/CodeGen/R600/selectcc_cnde_int.ll11
-rw-r--r--test/CodeGen/R600/set-dx10.ll137
-rw-r--r--test/CodeGen/R600/setcc.v4i32.ll12
-rw-r--r--test/CodeGen/R600/seto.ll13
-rw-r--r--test/CodeGen/R600/setuo.ll13
-rw-r--r--test/CodeGen/R600/short-args.ll41
-rw-r--r--test/CodeGen/R600/store.v4f32.ll9
-rw-r--r--test/CodeGen/R600/store.v4i32.ll9
-rw-r--r--test/CodeGen/R600/udiv.v4i32.ll15
-rw-r--r--test/CodeGen/R600/unsupported-cc.ll83
-rw-r--r--test/CodeGen/R600/urem.v4i32.ll15
-rw-r--r--test/CodeGen/R600/vec4-expand.ll53
65 files changed, 2933 insertions, 0 deletions
diff --git a/test/CodeGen/R600/128bit-kernel-args.ll b/test/CodeGen/R600/128bit-kernel-args.ll
new file mode 100644
index 000000000000..114f9e74474f
--- /dev/null
+++ b/test/CodeGen/R600/128bit-kernel-args.ll
@@ -0,0 +1,18 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @v4i32_kernel_arg
+; CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 40
+
+define void @v4i32_kernel_arg(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
+entry:
+ store <4 x i32> %in, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @v4f32_kernel_arg
+; CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 40
+define void @v4f32_kernel_args(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+entry:
+ store <4 x float> %in, <4 x float> addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/add.v4i32.ll b/test/CodeGen/R600/add.v4i32.ll
new file mode 100644
index 000000000000..ac4a87417bde
--- /dev/null
+++ b/test/CodeGen/R600/add.v4i32.ll
@@ -0,0 +1,15 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+ %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
+ %a = load <4 x i32> addrspace(1) * %in
+ %b = load <4 x i32> addrspace(1) * %b_ptr
+ %result = add <4 x i32> %a, %b
+ store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/alu-split.ll b/test/CodeGen/R600/alu-split.ll
new file mode 100644
index 000000000000..afefcd9f78b0
--- /dev/null
+++ b/test/CodeGen/R600/alu-split.ll
@@ -0,0 +1,850 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: ALU
+;CHECK: ALU
+;CHECK: ALU
+;CHECK-NOT: ALU
+
+define void @main() #0 {
+main_body:
+ %0 = call float @llvm.R600.load.input(i32 4)
+ %1 = call float @llvm.R600.load.input(i32 5)
+ %2 = call float @llvm.R600.load.input(i32 6)
+ %3 = call float @llvm.R600.load.input(i32 7)
+ %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+ %5 = extractelement <4 x float> %4, i32 0
+ %6 = fcmp une float 0x4016F2B020000000, %5
+ %7 = select i1 %6, float 1.000000e+00, float 0.000000e+00
+ %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+ %9 = extractelement <4 x float> %8, i32 1
+ %10 = fcmp une float 0x401FDCC640000000, %9
+ %11 = select i1 %10, float 1.000000e+00, float 0.000000e+00
+ %12 = fsub float -0.000000e+00, %7
+ %13 = fptosi float %12 to i32
+ %14 = fsub float -0.000000e+00, %11
+ %15 = fptosi float %14 to i32
+ %16 = bitcast i32 %13 to float
+ %17 = bitcast i32 %15 to float
+ %18 = bitcast float %16 to i32
+ %19 = bitcast float %17 to i32
+ %20 = or i32 %18, %19
+ %21 = bitcast i32 %20 to float
+ %22 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 17)
+ %23 = extractelement <4 x float> %22, i32 0
+ %24 = fcmp une float 0xC00574BC60000000, %23
+ %25 = select i1 %24, float 1.000000e+00, float 0.000000e+00
+ %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 17)
+ %27 = extractelement <4 x float> %26, i32 1
+ %28 = fcmp une float 0x40210068E0000000, %27
+ %29 = select i1 %28, float 1.000000e+00, float 0.000000e+00
+ %30 = fsub float -0.000000e+00, %25
+ %31 = fptosi float %30 to i32
+ %32 = fsub float -0.000000e+00, %29
+ %33 = fptosi float %32 to i32
+ %34 = bitcast i32 %31 to float
+ %35 = bitcast i32 %33 to float
+ %36 = bitcast float %34 to i32
+ %37 = bitcast float %35 to i32
+ %38 = or i32 %36, %37
+ %39 = bitcast i32 %38 to float
+ %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 18)
+ %41 = extractelement <4 x float> %40, i32 0
+ %42 = fcmp une float 0xBFC9A6B500000000, %41
+ %43 = select i1 %42, float 1.000000e+00, float 0.000000e+00
+ %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 18)
+ %45 = extractelement <4 x float> %44, i32 1
+ %46 = fcmp une float 0xC0119BDA60000000, %45
+ %47 = select i1 %46, float 1.000000e+00, float 0.000000e+00
+ %48 = fsub float -0.000000e+00, %43
+ %49 = fptosi float %48 to i32
+ %50 = fsub float -0.000000e+00, %47
+ %51 = fptosi float %50 to i32
+ %52 = bitcast i32 %49 to float
+ %53 = bitcast i32 %51 to float
+ %54 = bitcast float %52 to i32
+ %55 = bitcast float %53 to i32
+ %56 = or i32 %54, %55
+ %57 = bitcast i32 %56 to float
+ %58 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 19)
+ %59 = extractelement <4 x float> %58, i32 0
+ %60 = fcmp une float 0xC02085D640000000, %59
+ %61 = select i1 %60, float 1.000000e+00, float 0.000000e+00
+ %62 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 19)
+ %63 = extractelement <4 x float> %62, i32 1
+ %64 = fcmp une float 0xBFD7C1BDA0000000, %63
+ %65 = select i1 %64, float 1.000000e+00, float 0.000000e+00
+ %66 = fsub float -0.000000e+00, %61
+ %67 = fptosi float %66 to i32
+ %68 = fsub float -0.000000e+00, %65
+ %69 = fptosi float %68 to i32
+ %70 = bitcast i32 %67 to float
+ %71 = bitcast i32 %69 to float
+ %72 = bitcast float %70 to i32
+ %73 = bitcast float %71 to i32
+ %74 = or i32 %72, %73
+ %75 = bitcast i32 %74 to float
+ %76 = insertelement <4 x float> undef, float %21, i32 0
+ %77 = insertelement <4 x float> %76, float %39, i32 1
+ %78 = insertelement <4 x float> %77, float %57, i32 2
+ %79 = insertelement <4 x float> %78, float %75, i32 3
+ %80 = insertelement <4 x float> undef, float %21, i32 0
+ %81 = insertelement <4 x float> %80, float %39, i32 1
+ %82 = insertelement <4 x float> %81, float %57, i32 2
+ %83 = insertelement <4 x float> %82, float %75, i32 3
+ %84 = call float @llvm.AMDGPU.dp4(<4 x float> %79, <4 x float> %83)
+ %85 = bitcast float %84 to i32
+ %86 = icmp ne i32 %85, 0
+ %87 = sext i1 %86 to i32
+ %88 = bitcast i32 %87 to float
+ %89 = bitcast float %88 to i32
+ %90 = xor i32 %89, -1
+ %91 = bitcast i32 %90 to float
+ %92 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 20)
+ %93 = extractelement <4 x float> %92, i32 0
+ %94 = fcmp une float 0x401FDCC640000000, %93
+ %95 = select i1 %94, float 1.000000e+00, float 0.000000e+00
+ %96 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 20)
+ %97 = extractelement <4 x float> %96, i32 1
+ %98 = fcmp une float 0xC00574BC60000000, %97
+ %99 = select i1 %98, float 1.000000e+00, float 0.000000e+00
+ %100 = fsub float -0.000000e+00, %95
+ %101 = fptosi float %100 to i32
+ %102 = fsub float -0.000000e+00, %99
+ %103 = fptosi float %102 to i32
+ %104 = bitcast i32 %101 to float
+ %105 = bitcast i32 %103 to float
+ %106 = bitcast float %104 to i32
+ %107 = bitcast float %105 to i32
+ %108 = or i32 %106, %107
+ %109 = bitcast i32 %108 to float
+ %110 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 21)
+ %111 = extractelement <4 x float> %110, i32 0
+ %112 = fcmp une float 0x40210068E0000000, %111
+ %113 = select i1 %112, float 1.000000e+00, float 0.000000e+00
+ %114 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 21)
+ %115 = extractelement <4 x float> %114, i32 1
+ %116 = fcmp une float 0xBFC9A6B500000000, %115
+ %117 = select i1 %116, float 1.000000e+00, float 0.000000e+00
+ %118 = fsub float -0.000000e+00, %113
+ %119 = fptosi float %118 to i32
+ %120 = fsub float -0.000000e+00, %117
+ %121 = fptosi float %120 to i32
+ %122 = bitcast i32 %119 to float
+ %123 = bitcast i32 %121 to float
+ %124 = bitcast float %122 to i32
+ %125 = bitcast float %123 to i32
+ %126 = or i32 %124, %125
+ %127 = bitcast i32 %126 to float
+ %128 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 22)
+ %129 = extractelement <4 x float> %128, i32 0
+ %130 = fcmp une float 0xC0119BDA60000000, %129
+ %131 = select i1 %130, float 1.000000e+00, float 0.000000e+00
+ %132 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 22)
+ %133 = extractelement <4 x float> %132, i32 1
+ %134 = fcmp une float 0xC02085D640000000, %133
+ %135 = select i1 %134, float 1.000000e+00, float 0.000000e+00
+ %136 = fsub float -0.000000e+00, %131
+ %137 = fptosi float %136 to i32
+ %138 = fsub float -0.000000e+00, %135
+ %139 = fptosi float %138 to i32
+ %140 = bitcast i32 %137 to float
+ %141 = bitcast i32 %139 to float
+ %142 = bitcast float %140 to i32
+ %143 = bitcast float %141 to i32
+ %144 = or i32 %142, %143
+ %145 = bitcast i32 %144 to float
+ %146 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 23)
+ %147 = extractelement <4 x float> %146, i32 0
+ %148 = fcmp une float 0xBFD7C1BDA0000000, %147
+ %149 = select i1 %148, float 1.000000e+00, float 0.000000e+00
+ %150 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 23)
+ %151 = extractelement <4 x float> %150, i32 1
+ %152 = fcmp une float 0x401E1D7DC0000000, %151
+ %153 = select i1 %152, float 1.000000e+00, float 0.000000e+00
+ %154 = fsub float -0.000000e+00, %149
+ %155 = fptosi float %154 to i32
+ %156 = fsub float -0.000000e+00, %153
+ %157 = fptosi float %156 to i32
+ %158 = bitcast i32 %155 to float
+ %159 = bitcast i32 %157 to float
+ %160 = bitcast float %158 to i32
+ %161 = bitcast float %159 to i32
+ %162 = or i32 %160, %161
+ %163 = bitcast i32 %162 to float
+ %164 = insertelement <4 x float> undef, float %109, i32 0
+ %165 = insertelement <4 x float> %164, float %127, i32 1
+ %166 = insertelement <4 x float> %165, float %145, i32 2
+ %167 = insertelement <4 x float> %166, float %163, i32 3
+ %168 = insertelement <4 x float> undef, float %109, i32 0
+ %169 = insertelement <4 x float> %168, float %127, i32 1
+ %170 = insertelement <4 x float> %169, float %145, i32 2
+ %171 = insertelement <4 x float> %170, float %163, i32 3
+ %172 = call float @llvm.AMDGPU.dp4(<4 x float> %167, <4 x float> %171)
+ %173 = bitcast float %172 to i32
+ %174 = icmp ne i32 %173, 0
+ %175 = sext i1 %174 to i32
+ %176 = bitcast i32 %175 to float
+ %177 = bitcast float %176 to i32
+ %178 = xor i32 %177, -1
+ %179 = bitcast i32 %178 to float
+ %180 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+ %181 = extractelement <4 x float> %180, i32 0
+ %182 = fcmp une float 0x401FDCC640000000, %181
+ %183 = select i1 %182, float 1.000000e+00, float 0.000000e+00
+ %184 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+ %185 = extractelement <4 x float> %184, i32 1
+ %186 = fcmp une float 0xC00574BC60000000, %185
+ %187 = select i1 %186, float 1.000000e+00, float 0.000000e+00
+ %188 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+ %189 = extractelement <4 x float> %188, i32 2
+ %190 = fcmp une float 0x40210068E0000000, %189
+ %191 = select i1 %190, float 1.000000e+00, float 0.000000e+00
+ %192 = fsub float -0.000000e+00, %183
+ %193 = fptosi float %192 to i32
+ %194 = fsub float -0.000000e+00, %187
+ %195 = fptosi float %194 to i32
+ %196 = fsub float -0.000000e+00, %191
+ %197 = fptosi float %196 to i32
+ %198 = bitcast i32 %193 to float
+ %199 = bitcast i32 %195 to float
+ %200 = bitcast i32 %197 to float
+ %201 = bitcast float %199 to i32
+ %202 = bitcast float %200 to i32
+ %203 = or i32 %201, %202
+ %204 = bitcast i32 %203 to float
+ %205 = bitcast float %198 to i32
+ %206 = bitcast float %204 to i32
+ %207 = or i32 %205, %206
+ %208 = bitcast i32 %207 to float
+ %209 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %210 = extractelement <4 x float> %209, i32 0
+ %211 = fcmp une float 0xBFC9A6B500000000, %210
+ %212 = select i1 %211, float 1.000000e+00, float 0.000000e+00
+ %213 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %214 = extractelement <4 x float> %213, i32 1
+ %215 = fcmp une float 0xC0119BDA60000000, %214
+ %216 = select i1 %215, float 1.000000e+00, float 0.000000e+00
+ %217 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %218 = extractelement <4 x float> %217, i32 2
+ %219 = fcmp une float 0xC02085D640000000, %218
+ %220 = select i1 %219, float 1.000000e+00, float 0.000000e+00
+ %221 = fsub float -0.000000e+00, %212
+ %222 = fptosi float %221 to i32
+ %223 = fsub float -0.000000e+00, %216
+ %224 = fptosi float %223 to i32
+ %225 = fsub float -0.000000e+00, %220
+ %226 = fptosi float %225 to i32
+ %227 = bitcast i32 %222 to float
+ %228 = bitcast i32 %224 to float
+ %229 = bitcast i32 %226 to float
+ %230 = bitcast float %228 to i32
+ %231 = bitcast float %229 to i32
+ %232 = or i32 %230, %231
+ %233 = bitcast i32 %232 to float
+ %234 = bitcast float %227 to i32
+ %235 = bitcast float %233 to i32
+ %236 = or i32 %234, %235
+ %237 = bitcast i32 %236 to float
+ %238 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+ %239 = extractelement <4 x float> %238, i32 0
+ %240 = fcmp une float 0xBFD7C1BDA0000000, %239
+ %241 = select i1 %240, float 1.000000e+00, float 0.000000e+00
+ %242 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+ %243 = extractelement <4 x float> %242, i32 1
+ %244 = fcmp une float 0x401E1D7DC0000000, %243
+ %245 = select i1 %244, float 1.000000e+00, float 0.000000e+00
+ %246 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+ %247 = extractelement <4 x float> %246, i32 2
+ %248 = fcmp une float 0xC019893740000000, %247
+ %249 = select i1 %248, float 1.000000e+00, float 0.000000e+00
+ %250 = fsub float -0.000000e+00, %241
+ %251 = fptosi float %250 to i32
+ %252 = fsub float -0.000000e+00, %245
+ %253 = fptosi float %252 to i32
+ %254 = fsub float -0.000000e+00, %249
+ %255 = fptosi float %254 to i32
+ %256 = bitcast i32 %251 to float
+ %257 = bitcast i32 %253 to float
+ %258 = bitcast i32 %255 to float
+ %259 = bitcast float %257 to i32
+ %260 = bitcast float %258 to i32
+ %261 = or i32 %259, %260
+ %262 = bitcast i32 %261 to float
+ %263 = bitcast float %256 to i32
+ %264 = bitcast float %262 to i32
+ %265 = or i32 %263, %264
+ %266 = bitcast i32 %265 to float
+ %267 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+ %268 = extractelement <4 x float> %267, i32 0
+ %269 = fcmp une float 0x40220F0D80000000, %268
+ %270 = select i1 %269, float 1.000000e+00, float 0.000000e+00
+ %271 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+ %272 = extractelement <4 x float> %271, i32 1
+ %273 = fcmp une float 0xC018E2EB20000000, %272
+ %274 = select i1 %273, float 1.000000e+00, float 0.000000e+00
+ %275 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+ %276 = extractelement <4 x float> %275, i32 2
+ %277 = fcmp une float 0xBFEA8DB8C0000000, %276
+ %278 = select i1 %277, float 1.000000e+00, float 0.000000e+00
+ %279 = fsub float -0.000000e+00, %270
+ %280 = fptosi float %279 to i32
+ %281 = fsub float -0.000000e+00, %274
+ %282 = fptosi float %281 to i32
+ %283 = fsub float -0.000000e+00, %278
+ %284 = fptosi float %283 to i32
+ %285 = bitcast i32 %280 to float
+ %286 = bitcast i32 %282 to float
+ %287 = bitcast i32 %284 to float
+ %288 = bitcast float %286 to i32
+ %289 = bitcast float %287 to i32
+ %290 = or i32 %288, %289
+ %291 = bitcast i32 %290 to float
+ %292 = bitcast float %285 to i32
+ %293 = bitcast float %291 to i32
+ %294 = or i32 %292, %293
+ %295 = bitcast i32 %294 to float
+ %296 = insertelement <4 x float> undef, float %208, i32 0
+ %297 = insertelement <4 x float> %296, float %237, i32 1
+ %298 = insertelement <4 x float> %297, float %266, i32 2
+ %299 = insertelement <4 x float> %298, float %295, i32 3
+ %300 = insertelement <4 x float> undef, float %208, i32 0
+ %301 = insertelement <4 x float> %300, float %237, i32 1
+ %302 = insertelement <4 x float> %301, float %266, i32 2
+ %303 = insertelement <4 x float> %302, float %295, i32 3
+ %304 = call float @llvm.AMDGPU.dp4(<4 x float> %299, <4 x float> %303)
+ %305 = bitcast float %304 to i32
+ %306 = icmp ne i32 %305, 0
+ %307 = sext i1 %306 to i32
+ %308 = bitcast i32 %307 to float
+ %309 = bitcast float %308 to i32
+ %310 = xor i32 %309, -1
+ %311 = bitcast i32 %310 to float
+ %312 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
+ %313 = extractelement <4 x float> %312, i32 0
+ %314 = fcmp une float 0xC00574BC60000000, %313
+ %315 = select i1 %314, float 1.000000e+00, float 0.000000e+00
+ %316 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
+ %317 = extractelement <4 x float> %316, i32 1
+ %318 = fcmp une float 0x40210068E0000000, %317
+ %319 = select i1 %318, float 1.000000e+00, float 0.000000e+00
+ %320 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
+ %321 = extractelement <4 x float> %320, i32 2
+ %322 = fcmp une float 0xBFC9A6B500000000, %321
+ %323 = select i1 %322, float 1.000000e+00, float 0.000000e+00
+ %324 = fsub float -0.000000e+00, %315
+ %325 = fptosi float %324 to i32
+ %326 = fsub float -0.000000e+00, %319
+ %327 = fptosi float %326 to i32
+ %328 = fsub float -0.000000e+00, %323
+ %329 = fptosi float %328 to i32
+ %330 = bitcast i32 %325 to float
+ %331 = bitcast i32 %327 to float
+ %332 = bitcast i32 %329 to float
+ %333 = bitcast float %331 to i32
+ %334 = bitcast float %332 to i32
+ %335 = or i32 %333, %334
+ %336 = bitcast i32 %335 to float
+ %337 = bitcast float %330 to i32
+ %338 = bitcast float %336 to i32
+ %339 = or i32 %337, %338
+ %340 = bitcast i32 %339 to float
+ %341 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13)
+ %342 = extractelement <4 x float> %341, i32 0
+ %343 = fcmp une float 0xC0119BDA60000000, %342
+ %344 = select i1 %343, float 1.000000e+00, float 0.000000e+00
+ %345 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13)
+ %346 = extractelement <4 x float> %345, i32 1
+ %347 = fcmp une float 0xC02085D640000000, %346
+ %348 = select i1 %347, float 1.000000e+00, float 0.000000e+00
+ %349 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13)
+ %350 = extractelement <4 x float> %349, i32 2
+ %351 = fcmp une float 0xBFD7C1BDA0000000, %350
+ %352 = select i1 %351, float 1.000000e+00, float 0.000000e+00
+ %353 = fsub float -0.000000e+00, %344
+ %354 = fptosi float %353 to i32
+ %355 = fsub float -0.000000e+00, %348
+ %356 = fptosi float %355 to i32
+ %357 = fsub float -0.000000e+00, %352
+ %358 = fptosi float %357 to i32
+ %359 = bitcast i32 %354 to float
+ %360 = bitcast i32 %356 to float
+ %361 = bitcast i32 %358 to float
+ %362 = bitcast float %360 to i32
+ %363 = bitcast float %361 to i32
+ %364 = or i32 %362, %363
+ %365 = bitcast i32 %364 to float
+ %366 = bitcast float %359 to i32
+ %367 = bitcast float %365 to i32
+ %368 = or i32 %366, %367
+ %369 = bitcast i32 %368 to float
+ %370 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+ %371 = extractelement <4 x float> %370, i32 0
+ %372 = fcmp une float 0x401E1D7DC0000000, %371
+ %373 = select i1 %372, float 1.000000e+00, float 0.000000e+00
+ %374 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+ %375 = extractelement <4 x float> %374, i32 1
+ %376 = fcmp une float 0xC019893740000000, %375
+ %377 = select i1 %376, float 1.000000e+00, float 0.000000e+00
+ %378 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+ %379 = extractelement <4 x float> %378, i32 2
+ %380 = fcmp une float 0x40220F0D80000000, %379
+ %381 = select i1 %380, float 1.000000e+00, float 0.000000e+00
+ %382 = fsub float -0.000000e+00, %373
+ %383 = fptosi float %382 to i32
+ %384 = fsub float -0.000000e+00, %377
+ %385 = fptosi float %384 to i32
+ %386 = fsub float -0.000000e+00, %381
+ %387 = fptosi float %386 to i32
+ %388 = bitcast i32 %383 to float
+ %389 = bitcast i32 %385 to float
+ %390 = bitcast i32 %387 to float
+ %391 = bitcast float %389 to i32
+ %392 = bitcast float %390 to i32
+ %393 = or i32 %391, %392
+ %394 = bitcast i32 %393 to float
+ %395 = bitcast float %388 to i32
+ %396 = bitcast float %394 to i32
+ %397 = or i32 %395, %396
+ %398 = bitcast i32 %397 to float
+ %399 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15)
+ %400 = extractelement <4 x float> %399, i32 0
+ %401 = fcmp une float 0xC018E2EB20000000, %400
+ %402 = select i1 %401, float 1.000000e+00, float 0.000000e+00
+ %403 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15)
+ %404 = extractelement <4 x float> %403, i32 1
+ %405 = fcmp une float 0xBFEA8DB8C0000000, %404
+ %406 = select i1 %405, float 1.000000e+00, float 0.000000e+00
+ %407 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15)
+ %408 = extractelement <4 x float> %407, i32 2
+ %409 = fcmp une float 0x4015236E20000000, %408
+ %410 = select i1 %409, float 1.000000e+00, float 0.000000e+00
+ %411 = fsub float -0.000000e+00, %402
+ %412 = fptosi float %411 to i32
+ %413 = fsub float -0.000000e+00, %406
+ %414 = fptosi float %413 to i32
+ %415 = fsub float -0.000000e+00, %410
+ %416 = fptosi float %415 to i32
+ %417 = bitcast i32 %412 to float
+ %418 = bitcast i32 %414 to float
+ %419 = bitcast i32 %416 to float
+ %420 = bitcast float %418 to i32
+ %421 = bitcast float %419 to i32
+ %422 = or i32 %420, %421
+ %423 = bitcast i32 %422 to float
+ %424 = bitcast float %417 to i32
+ %425 = bitcast float %423 to i32
+ %426 = or i32 %424, %425
+ %427 = bitcast i32 %426 to float
+ %428 = insertelement <4 x float> undef, float %340, i32 0
+ %429 = insertelement <4 x float> %428, float %369, i32 1
+ %430 = insertelement <4 x float> %429, float %398, i32 2
+ %431 = insertelement <4 x float> %430, float %427, i32 3
+ %432 = insertelement <4 x float> undef, float %340, i32 0
+ %433 = insertelement <4 x float> %432, float %369, i32 1
+ %434 = insertelement <4 x float> %433, float %398, i32 2
+ %435 = insertelement <4 x float> %434, float %427, i32 3
+ %436 = call float @llvm.AMDGPU.dp4(<4 x float> %431, <4 x float> %435)
+ %437 = bitcast float %436 to i32
+ %438 = icmp ne i32 %437, 0
+ %439 = sext i1 %438 to i32
+ %440 = bitcast i32 %439 to float
+ %441 = bitcast float %440 to i32
+ %442 = xor i32 %441, -1
+ %443 = bitcast i32 %442 to float
+ %444 = load <4 x float> addrspace(8)* null
+ %445 = extractelement <4 x float> %444, i32 0
+ %446 = fcmp une float 0xC00574BC60000000, %445
+ %447 = select i1 %446, float 1.000000e+00, float 0.000000e+00
+ %448 = load <4 x float> addrspace(8)* null
+ %449 = extractelement <4 x float> %448, i32 1
+ %450 = fcmp une float 0x40210068E0000000, %449
+ %451 = select i1 %450, float 1.000000e+00, float 0.000000e+00
+ %452 = load <4 x float> addrspace(8)* null
+ %453 = extractelement <4 x float> %452, i32 2
+ %454 = fcmp une float 0xBFC9A6B500000000, %453
+ %455 = select i1 %454, float 1.000000e+00, float 0.000000e+00
+ %456 = load <4 x float> addrspace(8)* null
+ %457 = extractelement <4 x float> %456, i32 3
+ %458 = fcmp une float 0xC0119BDA60000000, %457
+ %459 = select i1 %458, float 1.000000e+00, float 0.000000e+00
+ %460 = fsub float -0.000000e+00, %447
+ %461 = fptosi float %460 to i32
+ %462 = fsub float -0.000000e+00, %451
+ %463 = fptosi float %462 to i32
+ %464 = fsub float -0.000000e+00, %455
+ %465 = fptosi float %464 to i32
+ %466 = fsub float -0.000000e+00, %459
+ %467 = fptosi float %466 to i32
+ %468 = bitcast i32 %461 to float
+ %469 = bitcast i32 %463 to float
+ %470 = bitcast i32 %465 to float
+ %471 = bitcast i32 %467 to float
+ %472 = bitcast float %468 to i32
+ %473 = bitcast float %469 to i32
+ %474 = or i32 %472, %473
+ %475 = bitcast i32 %474 to float
+ %476 = bitcast float %470 to i32
+ %477 = bitcast float %471 to i32
+ %478 = or i32 %476, %477
+ %479 = bitcast i32 %478 to float
+ %480 = bitcast float %475 to i32
+ %481 = bitcast float %479 to i32
+ %482 = or i32 %480, %481
+ %483 = bitcast i32 %482 to float
+ %484 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %485 = extractelement <4 x float> %484, i32 0
+ %486 = fcmp une float 0xC02085D640000000, %485
+ %487 = select i1 %486, float 1.000000e+00, float 0.000000e+00
+ %488 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %489 = extractelement <4 x float> %488, i32 1
+ %490 = fcmp une float 0xBFD7C1BDA0000000, %489
+ %491 = select i1 %490, float 1.000000e+00, float 0.000000e+00
+ %492 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %493 = extractelement <4 x float> %492, i32 2
+ %494 = fcmp une float 0x401E1D7DC0000000, %493
+ %495 = select i1 %494, float 1.000000e+00, float 0.000000e+00
+ %496 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %497 = extractelement <4 x float> %496, i32 3
+ %498 = fcmp une float 0xC019893740000000, %497
+ %499 = select i1 %498, float 1.000000e+00, float 0.000000e+00
+ %500 = fsub float -0.000000e+00, %487
+ %501 = fptosi float %500 to i32
+ %502 = fsub float -0.000000e+00, %491
+ %503 = fptosi float %502 to i32
+ %504 = fsub float -0.000000e+00, %495
+ %505 = fptosi float %504 to i32
+ %506 = fsub float -0.000000e+00, %499
+ %507 = fptosi float %506 to i32
+ %508 = bitcast i32 %501 to float
+ %509 = bitcast i32 %503 to float
+ %510 = bitcast i32 %505 to float
+ %511 = bitcast i32 %507 to float
+ %512 = bitcast float %508 to i32
+ %513 = bitcast float %509 to i32
+ %514 = or i32 %512, %513
+ %515 = bitcast i32 %514 to float
+ %516 = bitcast float %510 to i32
+ %517 = bitcast float %511 to i32
+ %518 = or i32 %516, %517
+ %519 = bitcast i32 %518 to float
+ %520 = bitcast float %515 to i32
+ %521 = bitcast float %519 to i32
+ %522 = or i32 %520, %521
+ %523 = bitcast i32 %522 to float
+ %524 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %525 = extractelement <4 x float> %524, i32 0
+ %526 = fcmp une float 0x40220F0D80000000, %525
+ %527 = select i1 %526, float 1.000000e+00, float 0.000000e+00
+ %528 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %529 = extractelement <4 x float> %528, i32 1
+ %530 = fcmp une float 0xC018E2EB20000000, %529
+ %531 = select i1 %530, float 1.000000e+00, float 0.000000e+00
+ %532 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %533 = extractelement <4 x float> %532, i32 2
+ %534 = fcmp une float 0xBFEA8DB8C0000000, %533
+ %535 = select i1 %534, float 1.000000e+00, float 0.000000e+00
+ %536 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %537 = extractelement <4 x float> %536, i32 3
+ %538 = fcmp une float 0x4015236E20000000, %537
+ %539 = select i1 %538, float 1.000000e+00, float 0.000000e+00
+ %540 = fsub float -0.000000e+00, %527
+ %541 = fptosi float %540 to i32
+ %542 = fsub float -0.000000e+00, %531
+ %543 = fptosi float %542 to i32
+ %544 = fsub float -0.000000e+00, %535
+ %545 = fptosi float %544 to i32
+ %546 = fsub float -0.000000e+00, %539
+ %547 = fptosi float %546 to i32
+ %548 = bitcast i32 %541 to float
+ %549 = bitcast i32 %543 to float
+ %550 = bitcast i32 %545 to float
+ %551 = bitcast i32 %547 to float
+ %552 = bitcast float %548 to i32
+ %553 = bitcast float %549 to i32
+ %554 = or i32 %552, %553
+ %555 = bitcast i32 %554 to float
+ %556 = bitcast float %550 to i32
+ %557 = bitcast float %551 to i32
+ %558 = or i32 %556, %557
+ %559 = bitcast i32 %558 to float
+ %560 = bitcast float %555 to i32
+ %561 = bitcast float %559 to i32
+ %562 = or i32 %560, %561
+ %563 = bitcast i32 %562 to float
+ %564 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %565 = extractelement <4 x float> %564, i32 0
+ %566 = fcmp une float 0x4016ED5D00000000, %565
+ %567 = select i1 %566, float 1.000000e+00, float 0.000000e+00
+ %568 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %569 = extractelement <4 x float> %568, i32 1
+ %570 = fcmp une float 0x402332FEC0000000, %569
+ %571 = select i1 %570, float 1.000000e+00, float 0.000000e+00
+ %572 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %573 = extractelement <4 x float> %572, i32 2
+ %574 = fcmp une float 0xC01484B5E0000000, %573
+ %575 = select i1 %574, float 1.000000e+00, float 0.000000e+00
+ %576 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %577 = extractelement <4 x float> %576, i32 3
+ %578 = fcmp une float 0x400179A6C0000000, %577
+ %579 = select i1 %578, float 1.000000e+00, float 0.000000e+00
+ %580 = fsub float -0.000000e+00, %567
+ %581 = fptosi float %580 to i32
+ %582 = fsub float -0.000000e+00, %571
+ %583 = fptosi float %582 to i32
+ %584 = fsub float -0.000000e+00, %575
+ %585 = fptosi float %584 to i32
+ %586 = fsub float -0.000000e+00, %579
+ %587 = fptosi float %586 to i32
+ %588 = bitcast i32 %581 to float
+ %589 = bitcast i32 %583 to float
+ %590 = bitcast i32 %585 to float
+ %591 = bitcast i32 %587 to float
+ %592 = bitcast float %588 to i32
+ %593 = bitcast float %589 to i32
+ %594 = or i32 %592, %593
+ %595 = bitcast i32 %594 to float
+ %596 = bitcast float %590 to i32
+ %597 = bitcast float %591 to i32
+ %598 = or i32 %596, %597
+ %599 = bitcast i32 %598 to float
+ %600 = bitcast float %595 to i32
+ %601 = bitcast float %599 to i32
+ %602 = or i32 %600, %601
+ %603 = bitcast i32 %602 to float
+ %604 = insertelement <4 x float> undef, float %483, i32 0
+ %605 = insertelement <4 x float> %604, float %523, i32 1
+ %606 = insertelement <4 x float> %605, float %563, i32 2
+ %607 = insertelement <4 x float> %606, float %603, i32 3
+ %608 = insertelement <4 x float> undef, float %483, i32 0
+ %609 = insertelement <4 x float> %608, float %523, i32 1
+ %610 = insertelement <4 x float> %609, float %563, i32 2
+ %611 = insertelement <4 x float> %610, float %603, i32 3
+ %612 = call float @llvm.AMDGPU.dp4(<4 x float> %607, <4 x float> %611)
+ %613 = bitcast float %612 to i32
+ %614 = icmp ne i32 %613, 0
+ %615 = sext i1 %614 to i32
+ %616 = bitcast i32 %615 to float
+ %617 = bitcast float %616 to i32
+ %618 = xor i32 %617, -1
+ %619 = bitcast i32 %618 to float
+ %620 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %621 = extractelement <4 x float> %620, i32 0
+ %622 = fcmp une float 0x40210068E0000000, %621
+ %623 = select i1 %622, float 1.000000e+00, float 0.000000e+00
+ %624 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %625 = extractelement <4 x float> %624, i32 1
+ %626 = fcmp une float 0xBFC9A6B500000000, %625
+ %627 = select i1 %626, float 1.000000e+00, float 0.000000e+00
+ %628 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %629 = extractelement <4 x float> %628, i32 2
+ %630 = fcmp une float 0xC0119BDA60000000, %629
+ %631 = select i1 %630, float 1.000000e+00, float 0.000000e+00
+ %632 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %633 = extractelement <4 x float> %632, i32 3
+ %634 = fcmp une float 0xC02085D640000000, %633
+ %635 = select i1 %634, float 1.000000e+00, float 0.000000e+00
+ %636 = fsub float -0.000000e+00, %623
+ %637 = fptosi float %636 to i32
+ %638 = fsub float -0.000000e+00, %627
+ %639 = fptosi float %638 to i32
+ %640 = fsub float -0.000000e+00, %631
+ %641 = fptosi float %640 to i32
+ %642 = fsub float -0.000000e+00, %635
+ %643 = fptosi float %642 to i32
+ %644 = bitcast i32 %637 to float
+ %645 = bitcast i32 %639 to float
+ %646 = bitcast i32 %641 to float
+ %647 = bitcast i32 %643 to float
+ %648 = bitcast float %644 to i32
+ %649 = bitcast float %645 to i32
+ %650 = or i32 %648, %649
+ %651 = bitcast i32 %650 to float
+ %652 = bitcast float %646 to i32
+ %653 = bitcast float %647 to i32
+ %654 = or i32 %652, %653
+ %655 = bitcast i32 %654 to float
+ %656 = bitcast float %651 to i32
+ %657 = bitcast float %655 to i32
+ %658 = or i32 %656, %657
+ %659 = bitcast i32 %658 to float
+ %660 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %661 = extractelement <4 x float> %660, i32 0
+ %662 = fcmp une float 0xBFD7C1BDA0000000, %661
+ %663 = select i1 %662, float 1.000000e+00, float 0.000000e+00
+ %664 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %665 = extractelement <4 x float> %664, i32 1
+ %666 = fcmp une float 0x401E1D7DC0000000, %665
+ %667 = select i1 %666, float 1.000000e+00, float 0.000000e+00
+ %668 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %669 = extractelement <4 x float> %668, i32 2
+ %670 = fcmp une float 0xC019893740000000, %669
+ %671 = select i1 %670, float 1.000000e+00, float 0.000000e+00
+ %672 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %673 = extractelement <4 x float> %672, i32 3
+ %674 = fcmp une float 0x40220F0D80000000, %673
+ %675 = select i1 %674, float 1.000000e+00, float 0.000000e+00
+ %676 = fsub float -0.000000e+00, %663
+ %677 = fptosi float %676 to i32
+ %678 = fsub float -0.000000e+00, %667
+ %679 = fptosi float %678 to i32
+ %680 = fsub float -0.000000e+00, %671
+ %681 = fptosi float %680 to i32
+ %682 = fsub float -0.000000e+00, %675
+ %683 = fptosi float %682 to i32
+ %684 = bitcast i32 %677 to float
+ %685 = bitcast i32 %679 to float
+ %686 = bitcast i32 %681 to float
+ %687 = bitcast i32 %683 to float
+ %688 = bitcast float %684 to i32
+ %689 = bitcast float %685 to i32
+ %690 = or i32 %688, %689
+ %691 = bitcast i32 %690 to float
+ %692 = bitcast float %686 to i32
+ %693 = bitcast float %687 to i32
+ %694 = or i32 %692, %693
+ %695 = bitcast i32 %694 to float
+ %696 = bitcast float %691 to i32
+ %697 = bitcast float %695 to i32
+ %698 = or i32 %696, %697
+ %699 = bitcast i32 %698 to float
+ %700 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+ %701 = extractelement <4 x float> %700, i32 0
+ %702 = fcmp une float 0xC018E2EB20000000, %701
+ %703 = select i1 %702, float 1.000000e+00, float 0.000000e+00
+ %704 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+ %705 = extractelement <4 x float> %704, i32 1
+ %706 = fcmp une float 0xBFEA8DB8C0000000, %705
+ %707 = select i1 %706, float 1.000000e+00, float 0.000000e+00
+ %708 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+ %709 = extractelement <4 x float> %708, i32 2
+ %710 = fcmp une float 0x4015236E20000000, %709
+ %711 = select i1 %710, float 1.000000e+00, float 0.000000e+00
+ %712 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+ %713 = extractelement <4 x float> %712, i32 3
+ %714 = fcmp une float 0x4016ED5D00000000, %713
+ %715 = select i1 %714, float 1.000000e+00, float 0.000000e+00
+ %716 = fsub float -0.000000e+00, %703
+ %717 = fptosi float %716 to i32
+ %718 = fsub float -0.000000e+00, %707
+ %719 = fptosi float %718 to i32
+ %720 = fsub float -0.000000e+00, %711
+ %721 = fptosi float %720 to i32
+ %722 = fsub float -0.000000e+00, %715
+ %723 = fptosi float %722 to i32
+ %724 = bitcast i32 %717 to float
+ %725 = bitcast i32 %719 to float
+ %726 = bitcast i32 %721 to float
+ %727 = bitcast i32 %723 to float
+ %728 = bitcast float %724 to i32
+ %729 = bitcast float %725 to i32
+ %730 = or i32 %728, %729
+ %731 = bitcast i32 %730 to float
+ %732 = bitcast float %726 to i32
+ %733 = bitcast float %727 to i32
+ %734 = or i32 %732, %733
+ %735 = bitcast i32 %734 to float
+ %736 = bitcast float %731 to i32
+ %737 = bitcast float %735 to i32
+ %738 = or i32 %736, %737
+ %739 = bitcast i32 %738 to float
+ %740 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %741 = extractelement <4 x float> %740, i32 0
+ %742 = fcmp une float 0x402332FEC0000000, %741
+ %743 = select i1 %742, float 1.000000e+00, float 0.000000e+00
+ %744 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %745 = extractelement <4 x float> %744, i32 1
+ %746 = fcmp une float 0xC01484B5E0000000, %745
+ %747 = select i1 %746, float 1.000000e+00, float 0.000000e+00
+ %748 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %749 = extractelement <4 x float> %748, i32 2
+ %750 = fcmp une float 0x400179A6C0000000, %749
+ %751 = select i1 %750, float 1.000000e+00, float 0.000000e+00
+ %752 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %753 = extractelement <4 x float> %752, i32 3
+ %754 = fcmp une float 0xBFEE752540000000, %753
+ %755 = select i1 %754, float 1.000000e+00, float 0.000000e+00
+ %756 = fsub float -0.000000e+00, %743
+ %757 = fptosi float %756 to i32
+ %758 = fsub float -0.000000e+00, %747
+ %759 = fptosi float %758 to i32
+ %760 = fsub float -0.000000e+00, %751
+ %761 = fptosi float %760 to i32
+ %762 = fsub float -0.000000e+00, %755
+ %763 = fptosi float %762 to i32
+ %764 = bitcast i32 %757 to float
+ %765 = bitcast i32 %759 to float
+ %766 = bitcast i32 %761 to float
+ %767 = bitcast i32 %763 to float
+ %768 = bitcast float %764 to i32
+ %769 = bitcast float %765 to i32
+ %770 = or i32 %768, %769
+ %771 = bitcast i32 %770 to float
+ %772 = bitcast float %766 to i32
+ %773 = bitcast float %767 to i32
+ %774 = or i32 %772, %773
+ %775 = bitcast i32 %774 to float
+ %776 = bitcast float %771 to i32
+ %777 = bitcast float %775 to i32
+ %778 = or i32 %776, %777
+ %779 = bitcast i32 %778 to float
+ %780 = insertelement <4 x float> undef, float %659, i32 0
+ %781 = insertelement <4 x float> %780, float %699, i32 1
+ %782 = insertelement <4 x float> %781, float %739, i32 2
+ %783 = insertelement <4 x float> %782, float %779, i32 3
+ %784 = insertelement <4 x float> undef, float %659, i32 0
+ %785 = insertelement <4 x float> %784, float %699, i32 1
+ %786 = insertelement <4 x float> %785, float %739, i32 2
+ %787 = insertelement <4 x float> %786, float %779, i32 3
+ %788 = call float @llvm.AMDGPU.dp4(<4 x float> %783, <4 x float> %787)
+ %789 = bitcast float %788 to i32
+ %790 = icmp ne i32 %789, 0
+ %791 = sext i1 %790 to i32
+ %792 = bitcast i32 %791 to float
+ %793 = bitcast float %792 to i32
+ %794 = xor i32 %793, -1
+ %795 = bitcast i32 %794 to float
+ %796 = bitcast float %91 to i32
+ %797 = bitcast float %179 to i32
+ %798 = and i32 %796, %797
+ %799 = bitcast i32 %798 to float
+ %800 = bitcast float %311 to i32
+ %801 = bitcast float %443 to i32
+ %802 = and i32 %800, %801
+ %803 = bitcast i32 %802 to float
+ %804 = bitcast float %799 to i32
+ %805 = bitcast float %803 to i32
+ %806 = and i32 %804, %805
+ %807 = bitcast i32 %806 to float
+ %808 = bitcast float %619 to i32
+ %809 = bitcast float %795 to i32
+ %810 = and i32 %808, %809
+ %811 = bitcast i32 %810 to float
+ %812 = bitcast float %807 to i32
+ %813 = bitcast float %811 to i32
+ %814 = and i32 %812, %813
+ %815 = bitcast i32 %814 to float
+ %816 = bitcast float %815 to i32
+ %817 = icmp ne i32 %816, 0
+ %. = select i1 %817, float 1.000000e+00, float 0.000000e+00
+ %.32 = select i1 %817, float 0.000000e+00, float 1.000000e+00
+ %818 = insertelement <4 x float> undef, float %0, i32 0
+ %819 = insertelement <4 x float> %818, float %1, i32 1
+ %820 = insertelement <4 x float> %819, float %2, i32 2
+ %821 = insertelement <4 x float> %820, float %3, i32 3
+ call void @llvm.R600.store.swizzle(<4 x float> %821, i32 60, i32 1)
+ %822 = insertelement <4 x float> undef, float %.32, i32 0
+ %823 = insertelement <4 x float> %822, float %., i32 1
+ %824 = insertelement <4 x float> %823, float 0.000000e+00, i32 2
+ %825 = insertelement <4 x float> %824, float 1.000000e+00, i32 3
+ call void @llvm.R600.store.swizzle(<4 x float> %825, i32 0, i32 2)
+ ret void
+}
+
+declare float @llvm.R600.load.input(i32) #1
+
+declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="1" }
+attributes #1 = { readnone }
diff --git a/test/CodeGen/R600/and.v4i32.ll b/test/CodeGen/R600/and.v4i32.ll
new file mode 100644
index 000000000000..662085e2d673
--- /dev/null
+++ b/test/CodeGen/R600/and.v4i32.ll
@@ -0,0 +1,15 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+ %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
+ %a = load <4 x i32> addrspace(1) * %in
+ %b = load <4 x i32> addrspace(1) * %b_ptr
+ %result = and <4 x i32> %a, %b
+ store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll b/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll
new file mode 100644
index 000000000000..fd958b365961
--- /dev/null
+++ b/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll
@@ -0,0 +1,36 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; This test is for a bug in
+; DAGCombiner::reduceBuildVecConvertToConvertBuildVec() where
+; the wrong type was being passed to
+; TargetLowering::getOperationAction() when checking the legality of
+; ISD::UINT_TO_FP and ISD::SINT_TO_FP opcodes.
+
+
+; CHECK: @sint
+; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @sint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %in, i32 1
+ %sint = load i32 addrspace(1) * %in
+ %conv = sitofp i32 %sint to float
+ %0 = insertelement <4 x float> undef, float %conv, i32 0
+ %splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer
+ store <4 x float> %splat, <4 x float> addrspace(1)* %out
+ ret void
+}
+
+;CHECK: @uint
+;CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @uint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) {
+entry:
+ %ptr = getelementptr i32 addrspace(1)* %in, i32 1
+ %uint = load i32 addrspace(1) * %in
+ %conv = uitofp i32 %uint to float
+ %0 = insertelement <4 x float> undef, float %conv, i32 0
+ %splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer
+ store <4 x float> %splat, <4 x float> addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/disconnected-predset-break-bug.ll b/test/CodeGen/R600/disconnected-predset-break-bug.ll
new file mode 100644
index 000000000000..09baee7a1dcd
--- /dev/null
+++ b/test/CodeGen/R600/disconnected-predset-break-bug.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; PRED_SET* instructions must be tied to any instruction that uses their
+; result. This tests that there are no instructions between the PRED_SET*
+; and the PREDICATE_BREAK in this loop.
+
+; CHECK: @loop_ge
+; CHECK: LOOP_START_DX10
+; CHECK: PRED_SET
+; CHECK-NEXT: JUMP
+; CHECK-NEXT: LOOP_BREAK
+define void @loop_ge(i32 addrspace(1)* nocapture %out, i32 %iterations) nounwind {
+entry:
+ %cmp5 = icmp sgt i32 %iterations, 0
+ br i1 %cmp5, label %for.body, label %for.end
+
+for.body: ; preds = %for.body, %entry
+ %i.07.in = phi i32 [ %i.07, %for.body ], [ %iterations, %entry ]
+ %ai.06 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+ %i.07 = add nsw i32 %i.07.in, -1
+ %arrayidx = getelementptr inbounds i32 addrspace(1)* %out, i32 %ai.06
+ store i32 %i.07, i32 addrspace(1)* %arrayidx, align 4
+ %add = add nsw i32 %ai.06, 1
+ %exitcond = icmp eq i32 %add, %iterations
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
diff --git a/test/CodeGen/R600/fabs.ll b/test/CodeGen/R600/fabs.ll
new file mode 100644
index 000000000000..0407533eaa5f
--- /dev/null
+++ b/test/CodeGen/R600/fabs.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: MOV T{{[0-9]+\.[XYZW], \|T[0-9]+\.[XYZW]\|}}
+
+define void @test() {
+ %r0 = call float @llvm.R600.load.input(i32 0)
+ %r1 = call float @fabs( float %r0)
+ call void @llvm.AMDGPU.store.output(float %r1, i32 0)
+ ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
+declare float @fabs(float ) readnone
diff --git a/test/CodeGen/R600/fadd.ll b/test/CodeGen/R600/fadd.ll
new file mode 100644
index 000000000000..d7d1b6572c41
--- /dev/null
+++ b/test/CodeGen/R600/fadd.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+ %r0 = call float @llvm.R600.load.input(i32 0)
+ %r1 = call float @llvm.R600.load.input(i32 1)
+ %r2 = fadd float %r0, %r1
+ call void @llvm.AMDGPU.store.output(float %r2, i32 0)
+ ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
diff --git a/test/CodeGen/R600/fadd.v4f32.ll b/test/CodeGen/R600/fadd.v4f32.ll
new file mode 100644
index 000000000000..85dbfd52cbb3
--- /dev/null
+++ b/test/CodeGen/R600/fadd.v4f32.ll
@@ -0,0 +1,15 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+ %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
+ %a = load <4 x float> addrspace(1) * %in
+ %b = load <4 x float> addrspace(1) * %b_ptr
+ %result = fadd <4 x float> %a, %b
+ store <4 x float> %result, <4 x float> addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/fcmp-cnd.ll b/test/CodeGen/R600/fcmp-cnd.ll
new file mode 100644
index 000000000000..a94cfb5cf2fe
--- /dev/null
+++ b/test/CodeGen/R600/fcmp-cnd.ll
@@ -0,0 +1,14 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;Not checking arguments 2 and 3 to CNDE, because they may change between
+;registers and literal.x depending on what the optimizer does.
+;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
+entry:
+ %0 = load float addrspace(1)* %in
+ %cmp = fcmp oeq float %0, 0.000000e+00
+ %value = select i1 %cmp, i32 2, i32 3
+ store i32 %value, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/fcmp-cnde-int-args.ll b/test/CodeGen/R600/fcmp-cnde-int-args.ll
new file mode 100644
index 000000000000..55aba0d72d39
--- /dev/null
+++ b/test/CodeGen/R600/fcmp-cnde-int-args.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; This test checks a bug in R600TargetLowering::LowerSELECT_CC where the
+; chance to optimize the fcmp + select instructions to SET* was missed
+; due to the fact that the operands to fcmp and select had different types
+
+; CHECK: SET{{[A-Z]+}}_DX10
+
+define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
+entry:
+ %0 = load float addrspace(1)* %in
+ %cmp = fcmp oeq float %0, 0.000000e+00
+ %value = select i1 %cmp, i32 -1, i32 0
+ store i32 %value, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/fcmp.ll b/test/CodeGen/R600/fcmp.ll
new file mode 100644
index 000000000000..37f621d23958
--- /dev/null
+++ b/test/CodeGen/R600/fcmp.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @fcmp_sext
+; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @fcmp_sext(i32 addrspace(1)* %out, float addrspace(1)* %in) {
+entry:
+ %0 = load float addrspace(1)* %in
+ %arrayidx1 = getelementptr inbounds float addrspace(1)* %in, i32 1
+ %1 = load float addrspace(1)* %arrayidx1
+ %cmp = fcmp oeq float %0, %1
+ %sext = sext i1 %cmp to i32
+ store i32 %sext, i32 addrspace(1)* %out
+ ret void
+}
+
+; This test checks that a setcc node with f32 operands is lowered to a
+; SET*_DX10 instruction. Previously we were lowering this to:
+; SET* + FP_TO_SINT
+
+; CHECK: @fcmp_br
+; CHECK: SET{{[N]*}}E_DX10 T{{[0-9]+\.[XYZW], [a-zA-Z0-9, .]+}}(5.0
+
+define void @fcmp_br(i32 addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp oeq float %in, 5.0
+ br i1 %0, label %IF, label %ENDIF
+
+IF:
+ %1 = getelementptr i32 addrspace(1)* %out, i32 1
+ store i32 0, i32 addrspace(1)* %1
+ br label %ENDIF
+
+ENDIF:
+ store i32 0, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/fdiv.v4f32.ll b/test/CodeGen/R600/fdiv.v4f32.ll
new file mode 100644
index 000000000000..79e677f541f5
--- /dev/null
+++ b/test/CodeGen/R600/fdiv.v4f32.ll
@@ -0,0 +1,19 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+ %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
+ %a = load <4 x float> addrspace(1) * %in
+ %b = load <4 x float> addrspace(1) * %b_ptr
+ %result = fdiv <4 x float> %a, %b
+ store <4 x float> %result, <4 x float> addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/floor.ll b/test/CodeGen/R600/floor.ll
new file mode 100644
index 000000000000..845330f28419
--- /dev/null
+++ b/test/CodeGen/R600/floor.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: FLOOR T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+ %r0 = call float @llvm.R600.load.input(i32 0)
+ %r1 = call float @floor(float %r0)
+ call void @llvm.AMDGPU.store.output(float %r1, i32 0)
+ ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
+declare float @floor(float) readonly
diff --git a/test/CodeGen/R600/fmad.ll b/test/CodeGen/R600/fmad.ll
new file mode 100644
index 000000000000..a3d4d0ff0db7
--- /dev/null
+++ b/test/CodeGen/R600/fmad.ll
@@ -0,0 +1,19 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: MULADD_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+ %r0 = call float @llvm.R600.load.input(i32 0)
+ %r1 = call float @llvm.R600.load.input(i32 1)
+ %r2 = call float @llvm.R600.load.input(i32 2)
+ %r3 = fmul float %r0, %r1
+ %r4 = fadd float %r3, %r2
+ call void @llvm.AMDGPU.store.output(float %r4, i32 0)
+ ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
+declare float @fabs(float ) readnone
diff --git a/test/CodeGen/R600/fmax.ll b/test/CodeGen/R600/fmax.ll
new file mode 100644
index 000000000000..3708f0b9eed2
--- /dev/null
+++ b/test/CodeGen/R600/fmax.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: MAX T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+ %r0 = call float @llvm.R600.load.input(i32 0)
+ %r1 = call float @llvm.R600.load.input(i32 1)
+ %r2 = fcmp uge float %r0, %r1
+ %r3 = select i1 %r2, float %r0, float %r1
+ call void @llvm.AMDGPU.store.output(float %r3, i32 0)
+ ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
diff --git a/test/CodeGen/R600/fmin.ll b/test/CodeGen/R600/fmin.ll
new file mode 100644
index 000000000000..19d59ab3061e
--- /dev/null
+++ b/test/CodeGen/R600/fmin.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: MIN T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+ %r0 = call float @llvm.R600.load.input(i32 0)
+ %r1 = call float @llvm.R600.load.input(i32 1)
+ %r2 = fcmp uge float %r0, %r1
+ %r3 = select i1 %r2, float %r1, float %r0
+ call void @llvm.AMDGPU.store.output(float %r3, i32 0)
+ ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
diff --git a/test/CodeGen/R600/fmul.ll b/test/CodeGen/R600/fmul.ll
new file mode 100644
index 000000000000..eb1d523c0bb4
--- /dev/null
+++ b/test/CodeGen/R600/fmul.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+ %r0 = call float @llvm.R600.load.input(i32 0)
+ %r1 = call float @llvm.R600.load.input(i32 1)
+ %r2 = fmul float %r0, %r1
+ call void @llvm.AMDGPU.store.output(float %r2, i32 0)
+ ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
diff --git a/test/CodeGen/R600/fmul.v4f32.ll b/test/CodeGen/R600/fmul.v4f32.ll
new file mode 100644
index 000000000000..6d44a0c5c782
--- /dev/null
+++ b/test/CodeGen/R600/fmul.v4f32.ll
@@ -0,0 +1,15 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+ %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
+ %a = load <4 x float> addrspace(1) * %in
+ %b = load <4 x float> addrspace(1) * %b_ptr
+ %result = fmul <4 x float> %a, %b
+ store <4 x float> %result, <4 x float> addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/fsub.ll b/test/CodeGen/R600/fsub.ll
new file mode 100644
index 000000000000..591aa52676a4
--- /dev/null
+++ b/test/CodeGen/R600/fsub.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
+
+define void @test() {
+ %r0 = call float @llvm.R600.load.input(i32 0)
+ %r1 = call float @llvm.R600.load.input(i32 1)
+ %r2 = fsub float %r0, %r1
+ call void @llvm.AMDGPU.store.output(float %r2, i32 0)
+ ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
diff --git a/test/CodeGen/R600/fsub.v4f32.ll b/test/CodeGen/R600/fsub.v4f32.ll
new file mode 100644
index 000000000000..612a57e4b609
--- /dev/null
+++ b/test/CodeGen/R600/fsub.v4f32.ll
@@ -0,0 +1,15 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+ %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
+ %a = load <4 x float> addrspace(1) * %in
+ %b = load <4 x float> addrspace(1) * %b_ptr
+ %result = fsub <4 x float> %a, %b
+ store <4 x float> %result, <4 x float> addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/i8_to_double_to_float.ll b/test/CodeGen/R600/i8_to_double_to_float.ll
new file mode 100644
index 000000000000..39f33227fa4b
--- /dev/null
+++ b/test/CodeGen/R600/i8_to_double_to_float.ll
@@ -0,0 +1,11 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test(float addrspace(1)* %out, i8 addrspace(1)* %in) {
+ %1 = load i8 addrspace(1)* %in
+ %2 = uitofp i8 %1 to double
+ %3 = fptrunc double %2 to float
+ store float %3, float addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/icmp-select-sete-reverse-args.ll b/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
new file mode 100644
index 000000000000..71705a64f50e
--- /dev/null
+++ b/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
@@ -0,0 +1,18 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;Test that a select with reversed True/False values is correctly lowered
+;to a SETNE_INT. There should only be one SETNE_INT instruction.
+
+;CHECK: SETNE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK-NOT: SETNE_INT
+
+define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+entry:
+ %0 = load i32 addrspace(1)* %in
+ %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %in, i32 1
+ %1 = load i32 addrspace(1)* %arrayidx1
+ %cmp = icmp eq i32 %0, %1
+ %value = select i1 %cmp, i32 0, i32 -1
+ store i32 %value, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/jump_address.ll b/test/CodeGen/R600/jump_address.ll
new file mode 100644
index 000000000000..cd35bffb1304
--- /dev/null
+++ b/test/CodeGen/R600/jump_address.ll
@@ -0,0 +1,50 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: JUMP @4
+
+define void @main() #0 {
+main_body:
+ %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %1 = extractelement <4 x float> %0, i32 0
+ %2 = bitcast float %1 to i32
+ %3 = icmp eq i32 %2, 0
+ %4 = sext i1 %3 to i32
+ %5 = bitcast i32 %4 to float
+ %6 = bitcast float %5 to i32
+ %7 = icmp ne i32 %6, 0
+ br i1 %7, label %ENDIF, label %ELSE
+
+ELSE: ; preds = %main_body
+ %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %9 = extractelement <4 x float> %8, i32 0
+ %10 = bitcast float %9 to i32
+ %11 = icmp eq i32 %10, 1
+ %12 = sext i1 %11 to i32
+ %13 = bitcast i32 %12 to float
+ %14 = bitcast float %13 to i32
+ %15 = icmp ne i32 %14, 0
+ br i1 %15, label %IF13, label %ENDIF
+
+ENDIF: ; preds = %IF13, %ELSE, %main_body
+ %temp.0 = phi float [ 0xFFF8000000000000, %main_body ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF13 ]
+ %temp1.0 = phi float [ 0.000000e+00, %main_body ], [ %23, %IF13 ], [ 0.000000e+00, %ELSE ]
+ %temp2.0 = phi float [ 1.000000e+00, %main_body ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF13 ]
+ %temp3.0 = phi float [ 5.000000e-01, %main_body ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF13 ]
+ %16 = insertelement <4 x float> undef, float %temp.0, i32 0
+ %17 = insertelement <4 x float> %16, float %temp1.0, i32 1
+ %18 = insertelement <4 x float> %17, float %temp2.0, i32 2
+ %19 = insertelement <4 x float> %18, float %temp3.0, i32 3
+ call void @llvm.R600.store.swizzle(<4 x float> %19, i32 0, i32 0)
+ ret void
+
+IF13: ; preds = %ELSE
+ %20 = load <4 x float> addrspace(8)* null
+ %21 = extractelement <4 x float> %20, i32 0
+ %22 = fsub float -0.000000e+00, %21
+ %23 = fadd float 0xFFF8000000000000, %22
+ br label %ENDIF
+}
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="0" }
diff --git a/test/CodeGen/R600/kcache-fold.ll b/test/CodeGen/R600/kcache-fold.ll
new file mode 100644
index 000000000000..3d70e4bd54aa
--- /dev/null
+++ b/test/CodeGen/R600/kcache-fold.ll
@@ -0,0 +1,100 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @main1
+; CHECK: MOV T{{[0-9]+\.[XYZW], KC0}}
+define void @main1() {
+main_body:
+ %0 = load <4 x float> addrspace(8)* null
+ %1 = extractelement <4 x float> %0, i32 0
+ %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %3 = extractelement <4 x float> %2, i32 0
+ %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %5 = extractelement <4 x float> %4, i32 0
+ %6 = fcmp ult float %1, 0.000000e+00
+ %7 = select i1 %6, float %3, float %5
+ %8 = load <4 x float> addrspace(8)* null
+ %9 = extractelement <4 x float> %8, i32 1
+ %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %11 = extractelement <4 x float> %10, i32 1
+ %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %13 = extractelement <4 x float> %12, i32 1
+ %14 = fcmp ult float %9, 0.000000e+00
+ %15 = select i1 %14, float %11, float %13
+ %16 = load <4 x float> addrspace(8)* null
+ %17 = extractelement <4 x float> %16, i32 2
+ %18 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %19 = extractelement <4 x float> %18, i32 2
+ %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %21 = extractelement <4 x float> %20, i32 2
+ %22 = fcmp ult float %17, 0.000000e+00
+ %23 = select i1 %22, float %19, float %21
+ %24 = load <4 x float> addrspace(8)* null
+ %25 = extractelement <4 x float> %24, i32 3
+ %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %27 = extractelement <4 x float> %26, i32 3
+ %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %29 = extractelement <4 x float> %28, i32 3
+ %30 = fcmp ult float %25, 0.000000e+00
+ %31 = select i1 %30, float %27, float %29
+ %32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00)
+ %33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
+ %34 = call float @llvm.AMDIL.clamp.(float %23, float 0.000000e+00, float 1.000000e+00)
+ %35 = call float @llvm.AMDIL.clamp.(float %31, float 0.000000e+00, float 1.000000e+00)
+ %36 = insertelement <4 x float> undef, float %32, i32 0
+ %37 = insertelement <4 x float> %36, float %33, i32 1
+ %38 = insertelement <4 x float> %37, float %34, i32 2
+ %39 = insertelement <4 x float> %38, float %35, i32 3
+ call void @llvm.R600.store.swizzle(<4 x float> %39, i32 0, i32 0)
+ ret void
+}
+
+; CHECK: @main2
+; CHECK-NOT: MOV
+define void @main2() {
+main_body:
+ %0 = load <4 x float> addrspace(8)* null
+ %1 = extractelement <4 x float> %0, i32 0
+ %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %3 = extractelement <4 x float> %2, i32 0
+ %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %5 = extractelement <4 x float> %4, i32 1
+ %6 = fcmp ult float %1, 0.000000e+00
+ %7 = select i1 %6, float %3, float %5
+ %8 = load <4 x float> addrspace(8)* null
+ %9 = extractelement <4 x float> %8, i32 1
+ %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %11 = extractelement <4 x float> %10, i32 0
+ %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %13 = extractelement <4 x float> %12, i32 1
+ %14 = fcmp ult float %9, 0.000000e+00
+ %15 = select i1 %14, float %11, float %13
+ %16 = load <4 x float> addrspace(8)* null
+ %17 = extractelement <4 x float> %16, i32 2
+ %18 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %19 = extractelement <4 x float> %18, i32 3
+ %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %21 = extractelement <4 x float> %20, i32 2
+ %22 = fcmp ult float %17, 0.000000e+00
+ %23 = select i1 %22, float %19, float %21
+ %24 = load <4 x float> addrspace(8)* null
+ %25 = extractelement <4 x float> %24, i32 3
+ %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %27 = extractelement <4 x float> %26, i32 3
+ %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %29 = extractelement <4 x float> %28, i32 2
+ %30 = fcmp ult float %25, 0.000000e+00
+ %31 = select i1 %30, float %27, float %29
+ %32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00)
+ %33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
+ %34 = call float @llvm.AMDIL.clamp.(float %23, float 0.000000e+00, float 1.000000e+00)
+ %35 = call float @llvm.AMDIL.clamp.(float %31, float 0.000000e+00, float 1.000000e+00)
+ %36 = insertelement <4 x float> undef, float %32, i32 0
+ %37 = insertelement <4 x float> %36, float %33, i32 1
+ %38 = insertelement <4 x float> %37, float %34, i32 2
+ %39 = insertelement <4 x float> %38, float %35, i32 3
+ call void @llvm.R600.store.swizzle(<4 x float> %39, i32 0, i32 0)
+ ret void
+}
+
+declare float @llvm.AMDIL.clamp.(float, float, float) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
diff --git a/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll b/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll
new file mode 100644
index 000000000000..1aae7f9f91f4
--- /dev/null
+++ b/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; This tests a bug where LegalizeDAG was not checking the target's
+; BooleanContents value and always using one for true, when expanding
+; setcc to select_cc.
+;
+; This bug caused the icmp IR instruction to be expanded to two machine
+; instructions, when only one is needed.
+;
+
+; CHECK: @setcc_expand
+; CHECK: SET
+; CHECK-NOT: CND
+define void @setcc_expand(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %0 = icmp eq i32 %in, 5
+ br i1 %0, label %IF, label %ENDIF
+IF:
+ %1 = getelementptr i32 addrspace(1)* %out, i32 1
+ store i32 0, i32 addrspace(1)* %1
+ br label %ENDIF
+
+ENDIF:
+ store i32 0, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/lit.local.cfg b/test/CodeGen/R600/lit.local.cfg
new file mode 100644
index 000000000000..36ee493e5945
--- /dev/null
+++ b/test/CodeGen/R600/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+ if not config.parent:
+ return config
+ return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'R600' in targets:
+ config.unsupported = True
+
diff --git a/test/CodeGen/R600/literals.ll b/test/CodeGen/R600/literals.ll
new file mode 100644
index 000000000000..e69f64e0e142
--- /dev/null
+++ b/test/CodeGen/R600/literals.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; Test using an integer literal constant.
+; Generated ASM should be:
+; ADD_INT REG literal.x, 5
+; or
+; ADD_INT literal.x REG, 5
+
+; CHECK: @i32_literal
+; CHECK: ADD_INT {{[A-Z0-9,. ]*}}literal.x,{{[A-Z0-9,. ]*}} 5
+define void @i32_literal(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %0 = add i32 5, %in
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; Test using a float literal constant.
+; Generated ASM should be:
+; ADD REG literal.x, 5.0
+; or
+; ADD literal.x REG, 5.0
+
+; CHECK: @float_literal
+; CHECK: ADD {{[A-Z0-9,. ]*}}literal.x,{{[A-Z0-9,. ]*}} {{[0-9]+}}(5.0
+define void @float_literal(float addrspace(1)* %out, float %in) {
+entry:
+ %0 = fadd float 5.0, %in
+ store float %0, float addrspace(1)* %out
+ ret void
+}
+
diff --git a/test/CodeGen/R600/llvm.AMDGPU.mul.ll b/test/CodeGen/R600/llvm.AMDGPU.mul.ll
new file mode 100644
index 000000000000..693eb27457c2
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.mul.ll
@@ -0,0 +1,17 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+ %r0 = call float @llvm.R600.load.input(i32 0)
+ %r1 = call float @llvm.R600.load.input(i32 1)
+ %r2 = call float @llvm.AMDGPU.mul( float %r0, float %r1)
+ call void @llvm.AMDGPU.store.output(float %r2, i32 0)
+ ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
+declare float @llvm.AMDGPU.mul(float ,float ) readnone
diff --git a/test/CodeGen/R600/llvm.AMDGPU.tex.ll b/test/CodeGen/R600/llvm.AMDGPU.tex.ll
new file mode 100644
index 000000000000..74331fa26934
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.tex.ll
@@ -0,0 +1,42 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 1
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 2
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 3
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 4
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 5
+;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 6
+;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 7
+;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 8
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 9
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 10
+;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 11
+;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 12
+;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 13
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 14
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 15
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 16
+
+define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+ %addr = load <4 x float> addrspace(1)* %in
+ %res1 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %addr, i32 0, i32 0, i32 1)
+ %res2 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res1, i32 0, i32 0, i32 2)
+ %res3 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res2, i32 0, i32 0, i32 3)
+ %res4 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res3, i32 0, i32 0, i32 4)
+ %res5 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res4, i32 0, i32 0, i32 5)
+ %res6 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res5, i32 0, i32 0, i32 6)
+ %res7 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res6, i32 0, i32 0, i32 7)
+ %res8 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res7, i32 0, i32 0, i32 8)
+ %res9 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res8, i32 0, i32 0, i32 9)
+ %res10 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res9, i32 0, i32 0, i32 10)
+ %res11 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res10, i32 0, i32 0, i32 11)
+ %res12 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res11, i32 0, i32 0, i32 12)
+ %res13 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res12, i32 0, i32 0, i32 13)
+ %res14 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res13, i32 0, i32 0, i32 14)
+ %res15 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res14, i32 0, i32 0, i32 15)
+ %res16 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res15, i32 0, i32 0, i32 16)
+ store <4 x float> %res16, <4 x float> addrspace(1)* %out
+ ret void
+}
+
+declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone
diff --git a/test/CodeGen/R600/llvm.AMDGPU.trunc.ll b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll
new file mode 100644
index 000000000000..fac957f7eeec
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: TRUNC T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+ %r0 = call float @llvm.R600.load.input(i32 0)
+ %r1 = call float @llvm.AMDGPU.trunc( float %r0)
+ call void @llvm.AMDGPU.store.output(float %r1, i32 0)
+ ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
+declare float @llvm.AMDGPU.trunc(float ) readnone
diff --git a/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
new file mode 100644
index 000000000000..bf0cdaa2fa3a
--- /dev/null
+++ b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
@@ -0,0 +1,21 @@
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+
+;CHECK: S_MOV_B32
+;CHECK-NEXT: V_INTERP_MOV_F32
+
+define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" {
+main_body:
+ %4 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
+ %5 = call i32 @llvm.SI.packf16(float %4, float %4)
+ %6 = bitcast i32 %5 to float
+ call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %6, float %6, float %6, float %6)
+ ret void
+}
+
+declare void @llvm.AMDGPU.shader.type(i32)
+
+declare float @llvm.SI.fs.constant(i32, i32, i32) readnone
+
+declare i32 @llvm.SI.packf16(float, float) readnone
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/llvm.SI.sample.ll b/test/CodeGen/R600/llvm.SI.sample.ll
new file mode 100644
index 000000000000..c724395b98c2
--- /dev/null
+++ b/test/CodeGen/R600/llvm.SI.sample.ll
@@ -0,0 +1,106 @@
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+
+;CHECK: IMAGE_SAMPLE
+;CHECK: IMAGE_SAMPLE
+;CHECK: IMAGE_SAMPLE
+;CHECK: IMAGE_SAMPLE
+;CHECK: IMAGE_SAMPLE
+;CHECK: IMAGE_SAMPLE_C
+;CHECK: IMAGE_SAMPLE_C
+;CHECK: IMAGE_SAMPLE_C
+;CHECK: IMAGE_SAMPLE
+;CHECK: IMAGE_SAMPLE
+;CHECK: IMAGE_SAMPLE_C
+;CHECK: IMAGE_SAMPLE_C
+;CHECK: IMAGE_SAMPLE_C
+;CHECK: IMAGE_SAMPLE
+;CHECK: IMAGE_SAMPLE
+;CHECK: IMAGE_SAMPLE
+
+define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+ %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
+ %v2 = insertelement <4 x i32> undef, i32 %a1, i32 1
+ %v3 = insertelement <4 x i32> undef, i32 %a1, i32 2
+ %v4 = insertelement <4 x i32> undef, i32 %a1, i32 3
+ %v5 = insertelement <4 x i32> undef, i32 %a2, i32 0
+ %v6 = insertelement <4 x i32> undef, i32 %a2, i32 1
+ %v7 = insertelement <4 x i32> undef, i32 %a2, i32 2
+ %v8 = insertelement <4 x i32> undef, i32 %a2, i32 3
+ %v9 = insertelement <4 x i32> undef, i32 %a3, i32 0
+ %v10 = insertelement <4 x i32> undef, i32 %a3, i32 1
+ %v11 = insertelement <4 x i32> undef, i32 %a3, i32 2
+ %v12 = insertelement <4 x i32> undef, i32 %a3, i32 3
+ %v13 = insertelement <4 x i32> undef, i32 %a4, i32 0
+ %v14 = insertelement <4 x i32> undef, i32 %a4, i32 1
+ %v15 = insertelement <4 x i32> undef, i32 %a4, i32 2
+ %v16 = insertelement <4 x i32> undef, i32 %a4, i32 3
+ %res1 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v1,
+ <8 x i32> undef, <4 x i32> undef, i32 1)
+ %res2 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v2,
+ <8 x i32> undef, <4 x i32> undef, i32 2)
+ %res3 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v3,
+ <8 x i32> undef, <4 x i32> undef, i32 3)
+ %res4 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v4,
+ <8 x i32> undef, <4 x i32> undef, i32 4)
+ %res5 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v5,
+ <8 x i32> undef, <4 x i32> undef, i32 5)
+ %res6 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v6,
+ <8 x i32> undef, <4 x i32> undef, i32 6)
+ %res7 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v7,
+ <8 x i32> undef, <4 x i32> undef, i32 7)
+ %res8 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v8,
+ <8 x i32> undef, <4 x i32> undef, i32 8)
+ %res9 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v9,
+ <8 x i32> undef, <4 x i32> undef, i32 9)
+ %res10 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v10,
+ <8 x i32> undef, <4 x i32> undef, i32 10)
+ %res11 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v11,
+ <8 x i32> undef, <4 x i32> undef, i32 11)
+ %res12 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v12,
+ <8 x i32> undef, <4 x i32> undef, i32 12)
+ %res13 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v13,
+ <8 x i32> undef, <4 x i32> undef, i32 13)
+ %res14 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v14,
+ <8 x i32> undef, <4 x i32> undef, i32 14)
+ %res15 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v15,
+ <8 x i32> undef, <4 x i32> undef, i32 15)
+ %res16 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v16,
+ <8 x i32> undef, <4 x i32> undef, i32 16)
+ %e1 = extractelement <4 x float> %res1, i32 0
+ %e2 = extractelement <4 x float> %res2, i32 0
+ %e3 = extractelement <4 x float> %res3, i32 0
+ %e4 = extractelement <4 x float> %res4, i32 0
+ %e5 = extractelement <4 x float> %res5, i32 0
+ %e6 = extractelement <4 x float> %res6, i32 0
+ %e7 = extractelement <4 x float> %res7, i32 0
+ %e8 = extractelement <4 x float> %res8, i32 0
+ %e9 = extractelement <4 x float> %res9, i32 0
+ %e10 = extractelement <4 x float> %res10, i32 0
+ %e11 = extractelement <4 x float> %res11, i32 0
+ %e12 = extractelement <4 x float> %res12, i32 0
+ %e13 = extractelement <4 x float> %res13, i32 0
+ %e14 = extractelement <4 x float> %res14, i32 0
+ %e15 = extractelement <4 x float> %res15, i32 0
+ %e16 = extractelement <4 x float> %res16, i32 0
+ %s1 = fadd float %e1, %e2
+ %s2 = fadd float %s1, %e3
+ %s3 = fadd float %s2, %e4
+ %s4 = fadd float %s3, %e5
+ %s5 = fadd float %s4, %e6
+ %s6 = fadd float %s5, %e7
+ %s7 = fadd float %s6, %e8
+ %s8 = fadd float %s7, %e9
+ %s9 = fadd float %s8, %e10
+ %s10 = fadd float %s9, %e11
+ %s11 = fadd float %s10, %e12
+ %s12 = fadd float %s11, %e13
+ %s13 = fadd float %s12, %e14
+ %s14 = fadd float %s13, %e15
+ %s15 = fadd float %s14, %e16
+ call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %s15, float %s15, float %s15, float %s15)
+ ret void
+}
+
+declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/llvm.cos.ll b/test/CodeGen/R600/llvm.cos.ll
new file mode 100644
index 000000000000..dc120bfb00c2
--- /dev/null
+++ b/test/CodeGen/R600/llvm.cos.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: COS T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+ %r0 = call float @llvm.R600.load.input(i32 0)
+ %r1 = call float @llvm.cos.f32(float %r0)
+ call void @llvm.AMDGPU.store.output(float %r1, i32 0)
+ ret void
+}
+
+declare float @llvm.cos.f32(float) readnone
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
diff --git a/test/CodeGen/R600/llvm.pow.ll b/test/CodeGen/R600/llvm.pow.ll
new file mode 100644
index 000000000000..b4ce9f429f16
--- /dev/null
+++ b/test/CodeGen/R600/llvm.pow.ll
@@ -0,0 +1,19 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: LOG_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK-NEXT: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK-NEXT: EXP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+ %r0 = call float @llvm.R600.load.input(i32 0)
+ %r1 = call float @llvm.R600.load.input(i32 1)
+ %r2 = call float @llvm.pow.f32( float %r0, float %r1)
+ call void @llvm.AMDGPU.store.output(float %r2, i32 0)
+ ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
+declare float @llvm.pow.f32(float ,float ) readonly
diff --git a/test/CodeGen/R600/llvm.sin.ll b/test/CodeGen/R600/llvm.sin.ll
new file mode 100644
index 000000000000..5cd6998c9370
--- /dev/null
+++ b/test/CodeGen/R600/llvm.sin.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: SIN T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+ %r0 = call float @llvm.R600.load.input(i32 0)
+ %r1 = call float @llvm.sin.f32( float %r0)
+ call void @llvm.AMDGPU.store.output(float %r1, i32 0)
+ ret void
+}
+
+declare float @llvm.sin.f32(float) readnone
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
diff --git a/test/CodeGen/R600/load.constant_addrspace.f32.ll b/test/CodeGen/R600/load.constant_addrspace.f32.ll
new file mode 100644
index 000000000000..93627283bb94
--- /dev/null
+++ b/test/CodeGen/R600/load.constant_addrspace.f32.ll
@@ -0,0 +1,9 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: VTX_READ_32 T{{[0-9]+\.X, T[0-9]+\.X}}
+
+define void @test(float addrspace(1)* %out, float addrspace(2)* %in) {
+ %1 = load float addrspace(2)* %in
+ store float %1, float addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/load.i8.ll b/test/CodeGen/R600/load.i8.ll
new file mode 100644
index 000000000000..b070dcd52049
--- /dev/null
+++ b/test/CodeGen/R600/load.i8.ll
@@ -0,0 +1,10 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
+
+define void @test(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
+ %1 = load i8 addrspace(1)* %in
+ %2 = zext i8 %1 to i32
+ store i32 %2, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/lshl.ll b/test/CodeGen/R600/lshl.ll
new file mode 100644
index 000000000000..423adb9da900
--- /dev/null
+++ b/test/CodeGen/R600/lshl.ll
@@ -0,0 +1,14 @@
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+
+;CHECK: V_LSHLREV_B32_e32 VGPR0, 1, VGPR0
+
+define void @test(i32 %p) {
+ %i = mul i32 %p, 2
+ %r = bitcast i32 %i to float
+ call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
+ ret void
+}
+
+declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/lshr.ll b/test/CodeGen/R600/lshr.ll
new file mode 100644
index 000000000000..551eac1d76bf
--- /dev/null
+++ b/test/CodeGen/R600/lshr.ll
@@ -0,0 +1,14 @@
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+
+;CHECK: V_LSHRREV_B32_e32 VGPR0, 1, VGPR0
+
+define void @test(i32 %p) {
+ %i = udiv i32 %p, 2
+ %r = bitcast i32 %i to float
+ call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
+ ret void
+}
+
+declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/mulhu.ll b/test/CodeGen/R600/mulhu.ll
new file mode 100644
index 000000000000..28744e00c3cf
--- /dev/null
+++ b/test/CodeGen/R600/mulhu.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+
+;CHECK: V_MOV_B32_e32 VGPR1, -1431655765
+;CHECK-NEXT: V_MUL_HI_U32 VGPR0, VGPR0, VGPR1, 0, 0, 0, 0, 0
+;CHECK-NEXT: V_LSHRREV_B32_e32 VGPR0, 1, VGPR0
+
+define void @test(i32 %p) {
+ %i = udiv i32 %p, 3
+ %r = bitcast i32 %i to float
+ call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
+ ret void
+}
+
+declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/predicates.ll b/test/CodeGen/R600/predicates.ll
new file mode 100644
index 000000000000..eb8b052b6f72
--- /dev/null
+++ b/test/CodeGen/R600/predicates.ll
@@ -0,0 +1,104 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; These tests make sure the compiler is optimizing branches using predicates
+; when it is legal to do so.
+
+; CHECK: @simple_if
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
+; CHECK: LSHL T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
+define void @simple_if(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %0 = icmp sgt i32 %in, 0
+ br i1 %0, label %IF, label %ENDIF
+
+IF:
+ %1 = shl i32 %in, 1
+ br label %ENDIF
+
+ENDIF:
+ %2 = phi i32 [ %in, %entry ], [ %1, %IF ]
+ store i32 %2, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @simple_if_else
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
+; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
+; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
+define void @simple_if_else(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %0 = icmp sgt i32 %in, 0
+ br i1 %0, label %IF, label %ELSE
+
+IF:
+ %1 = shl i32 %in, 1
+ br label %ENDIF
+
+ELSE:
+ %2 = lshr i32 %in, 1
+ br label %ENDIF
+
+ENDIF:
+ %3 = phi i32 [ %1, %IF ], [ %2, %ELSE ]
+ store i32 %3, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @nested_if
+; CHECK: ALU_PUSH_BEFORE
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT Exec
+; CHECK: JUMP
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
+; CHECK: LSHL T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
+; CHECK: POP
+define void @nested_if(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %0 = icmp sgt i32 %in, 0
+ br i1 %0, label %IF0, label %ENDIF
+
+IF0:
+ %1 = add i32 %in, 10
+ %2 = icmp sgt i32 %1, 0
+ br i1 %2, label %IF1, label %ENDIF
+
+IF1:
+ %3 = shl i32 %1, 1
+ br label %ENDIF
+
+ENDIF:
+ %4 = phi i32 [%in, %entry], [%1, %IF0], [%3, %IF1]
+ store i32 %4, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @nested_if_else
+; CHECK: ALU_PUSH_BEFORE
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT Exec
+; CHECK: JUMP
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
+; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
+; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
+; CHECK: POP
+define void @nested_if_else(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %0 = icmp sgt i32 %in, 0
+ br i1 %0, label %IF0, label %ENDIF
+
+IF0:
+ %1 = add i32 %in, 10
+ %2 = icmp sgt i32 %1, 0
+ br i1 %2, label %IF1, label %ELSE1
+
+IF1:
+ %3 = shl i32 %1, 1
+ br label %ENDIF
+
+ELSE1:
+ %4 = lshr i32 %in, 1
+ br label %ENDIF
+
+ENDIF:
+ %5 = phi i32 [%in, %entry], [%3, %IF1], [%4, %ELSE1]
+ store i32 %5, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/reciprocal.ll b/test/CodeGen/R600/reciprocal.ll
new file mode 100644
index 000000000000..6838c1ae3662
--- /dev/null
+++ b/test/CodeGen/R600/reciprocal.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+ %r0 = call float @llvm.R600.load.input(i32 0)
+ %r1 = fdiv float 1.0, %r0
+ call void @llvm.AMDGPU.store.output(float %r1, i32 0)
+ ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
+declare float @llvm.AMDGPU.rcp(float ) readnone
diff --git a/test/CodeGen/R600/schedule-fs-loop-nested-if.ll b/test/CodeGen/R600/schedule-fs-loop-nested-if.ll
new file mode 100644
index 000000000000..ba9620c40a49
--- /dev/null
+++ b/test/CodeGen/R600/schedule-fs-loop-nested-if.ll
@@ -0,0 +1,83 @@
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;REQUIRES: asserts
+
+define void @main() {
+main_body:
+ %0 = call float @llvm.R600.interp.input(i32 0, i32 0)
+ %1 = call float @llvm.R600.interp.input(i32 1, i32 0)
+ %2 = call float @llvm.R600.interp.input(i32 2, i32 0)
+ %3 = call float @llvm.R600.interp.input(i32 3, i32 0)
+ %4 = fcmp ult float %1, 0.000000e+00
+ %5 = select i1 %4, float 1.000000e+00, float 0.000000e+00
+ %6 = fsub float -0.000000e+00, %5
+ %7 = fptosi float %6 to i32
+ %8 = bitcast i32 %7 to float
+ %9 = fcmp ult float %0, 5.700000e+01
+ %10 = select i1 %9, float 1.000000e+00, float 0.000000e+00
+ %11 = fsub float -0.000000e+00, %10
+ %12 = fptosi float %11 to i32
+ %13 = bitcast i32 %12 to float
+ %14 = bitcast float %8 to i32
+ %15 = bitcast float %13 to i32
+ %16 = and i32 %14, %15
+ %17 = bitcast i32 %16 to float
+ %18 = bitcast float %17 to i32
+ %19 = icmp ne i32 %18, 0
+ %20 = fcmp ult float %0, 0.000000e+00
+ %21 = select i1 %20, float 1.000000e+00, float 0.000000e+00
+ %22 = fsub float -0.000000e+00, %21
+ %23 = fptosi float %22 to i32
+ %24 = bitcast i32 %23 to float
+ %25 = bitcast float %24 to i32
+ %26 = icmp ne i32 %25, 0
+ br i1 %19, label %IF, label %ELSE
+
+IF: ; preds = %main_body
+ %. = select i1 %26, float 0.000000e+00, float 1.000000e+00
+ %.18 = select i1 %26, float 1.000000e+00, float 0.000000e+00
+ br label %ENDIF
+
+ELSE: ; preds = %main_body
+ br i1 %26, label %ENDIF, label %ELSE17
+
+ENDIF: ; preds = %ELSE17, %ELSE, %IF
+ %temp1.0 = phi float [ %., %IF ], [ %48, %ELSE17 ], [ 0.000000e+00, %ELSE ]
+ %temp2.0 = phi float [ 0.000000e+00, %IF ], [ %49, %ELSE17 ], [ 1.000000e+00, %ELSE ]
+ %temp.0 = phi float [ %.18, %IF ], [ %47, %ELSE17 ], [ 0.000000e+00, %ELSE ]
+ %27 = call float @llvm.AMDIL.clamp.(float %temp.0, float 0.000000e+00, float 1.000000e+00)
+ %28 = call float @llvm.AMDIL.clamp.(float %temp1.0, float 0.000000e+00, float 1.000000e+00)
+ %29 = call float @llvm.AMDIL.clamp.(float %temp2.0, float 0.000000e+00, float 1.000000e+00)
+ %30 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
+ %31 = insertelement <4 x float> undef, float %27, i32 0
+ %32 = insertelement <4 x float> %31, float %28, i32 1
+ %33 = insertelement <4 x float> %32, float %29, i32 2
+ %34 = insertelement <4 x float> %33, float %30, i32 3
+ call void @llvm.R600.store.swizzle(<4 x float> %34, i32 0, i32 0)
+ ret void
+
+ELSE17: ; preds = %ELSE
+ %35 = fadd float 0.000000e+00, 0x3FC99999A0000000
+ %36 = fadd float 0.000000e+00, 0x3FC99999A0000000
+ %37 = fadd float 0.000000e+00, 0x3FC99999A0000000
+ %38 = fadd float %35, 0x3FC99999A0000000
+ %39 = fadd float %36, 0x3FC99999A0000000
+ %40 = fadd float %37, 0x3FC99999A0000000
+ %41 = fadd float %38, 0x3FC99999A0000000
+ %42 = fadd float %39, 0x3FC99999A0000000
+ %43 = fadd float %40, 0x3FC99999A0000000
+ %44 = fadd float %41, 0x3FC99999A0000000
+ %45 = fadd float %42, 0x3FC99999A0000000
+ %46 = fadd float %43, 0x3FC99999A0000000
+ %47 = fadd float %44, 0x3FC99999A0000000
+ %48 = fadd float %45, 0x3FC99999A0000000
+ %49 = fadd float %46, 0x3FC99999A0000000
+ br label %ENDIF
+}
+
+declare float @llvm.R600.interp.input(i32, i32) #0
+
+declare float @llvm.AMDIL.clamp.(float, float, float) #0
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { readnone }
diff --git a/test/CodeGen/R600/schedule-fs-loop-nested.ll b/test/CodeGen/R600/schedule-fs-loop-nested.ll
new file mode 100644
index 000000000000..5e875c49ab51
--- /dev/null
+++ b/test/CodeGen/R600/schedule-fs-loop-nested.ll
@@ -0,0 +1,88 @@
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;REQUIRES: asserts
+
+define void @main() {
+main_body:
+ %0 = load <4 x float> addrspace(9)* null
+ %1 = extractelement <4 x float> %0, i32 3
+ %2 = fptosi float %1 to i32
+ %3 = bitcast i32 %2 to float
+ %4 = bitcast float %3 to i32
+ %5 = sdiv i32 %4, 4
+ %6 = bitcast i32 %5 to float
+ %7 = bitcast float %6 to i32
+ %8 = mul i32 %7, 4
+ %9 = bitcast i32 %8 to float
+ %10 = bitcast float %9 to i32
+ %11 = sub i32 0, %10
+ %12 = bitcast i32 %11 to float
+ %13 = bitcast float %3 to i32
+ %14 = bitcast float %12 to i32
+ %15 = add i32 %13, %14
+ %16 = bitcast i32 %15 to float
+ %17 = load <4 x float> addrspace(9)* null
+ %18 = extractelement <4 x float> %17, i32 0
+ %19 = load <4 x float> addrspace(9)* null
+ %20 = extractelement <4 x float> %19, i32 1
+ %21 = load <4 x float> addrspace(9)* null
+ %22 = extractelement <4 x float> %21, i32 2
+ br label %LOOP
+
+LOOP: ; preds = %IF31, %main_body
+ %temp12.0 = phi float [ 0.000000e+00, %main_body ], [ %47, %IF31 ]
+ %temp6.0 = phi float [ %22, %main_body ], [ %temp6.1, %IF31 ]
+ %temp5.0 = phi float [ %20, %main_body ], [ %temp5.1, %IF31 ]
+ %temp4.0 = phi float [ %18, %main_body ], [ %temp4.1, %IF31 ]
+ %23 = bitcast float %temp12.0 to i32
+ %24 = bitcast float %6 to i32
+ %25 = icmp sge i32 %23, %24
+ %26 = sext i1 %25 to i32
+ %27 = bitcast i32 %26 to float
+ %28 = bitcast float %27 to i32
+ %29 = icmp ne i32 %28, 0
+ br i1 %29, label %IF, label %LOOP29
+
+IF: ; preds = %LOOP
+ %30 = call float @llvm.AMDIL.clamp.(float %temp4.0, float 0.000000e+00, float 1.000000e+00)
+ %31 = call float @llvm.AMDIL.clamp.(float %temp5.0, float 0.000000e+00, float 1.000000e+00)
+ %32 = call float @llvm.AMDIL.clamp.(float %temp6.0, float 0.000000e+00, float 1.000000e+00)
+ %33 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
+ %34 = insertelement <4 x float> undef, float %30, i32 0
+ %35 = insertelement <4 x float> %34, float %31, i32 1
+ %36 = insertelement <4 x float> %35, float %32, i32 2
+ %37 = insertelement <4 x float> %36, float %33, i32 3
+ call void @llvm.R600.store.swizzle(<4 x float> %37, i32 0, i32 0)
+ ret void
+
+LOOP29: ; preds = %LOOP, %ENDIF30
+ %temp6.1 = phi float [ %temp4.1, %ENDIF30 ], [ %temp6.0, %LOOP ]
+ %temp5.1 = phi float [ %temp6.1, %ENDIF30 ], [ %temp5.0, %LOOP ]
+ %temp4.1 = phi float [ %temp5.1, %ENDIF30 ], [ %temp4.0, %LOOP ]
+ %temp20.0 = phi float [ %50, %ENDIF30 ], [ 0.000000e+00, %LOOP ]
+ %38 = bitcast float %temp20.0 to i32
+ %39 = bitcast float %16 to i32
+ %40 = icmp sge i32 %38, %39
+ %41 = sext i1 %40 to i32
+ %42 = bitcast i32 %41 to float
+ %43 = bitcast float %42 to i32
+ %44 = icmp ne i32 %43, 0
+ br i1 %44, label %IF31, label %ENDIF30
+
+IF31: ; preds = %LOOP29
+ %45 = bitcast float %temp12.0 to i32
+ %46 = add i32 %45, 1
+ %47 = bitcast i32 %46 to float
+ br label %LOOP
+
+ENDIF30: ; preds = %LOOP29
+ %48 = bitcast float %temp20.0 to i32
+ %49 = add i32 %48, 1
+ %50 = bitcast i32 %49 to float
+ br label %LOOP29
+}
+
+declare float @llvm.AMDIL.clamp.(float, float, float) #0
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { readnone }
diff --git a/test/CodeGen/R600/schedule-fs-loop.ll b/test/CodeGen/R600/schedule-fs-loop.ll
new file mode 100644
index 000000000000..d142cacd4335
--- /dev/null
+++ b/test/CodeGen/R600/schedule-fs-loop.ll
@@ -0,0 +1,55 @@
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;REQUIRES: asserts
+
+define void @main() {
+main_body:
+ %0 = load <4 x float> addrspace(9)* null
+ %1 = extractelement <4 x float> %0, i32 3
+ %2 = fptosi float %1 to i32
+ %3 = bitcast i32 %2 to float
+ %4 = load <4 x float> addrspace(9)* null
+ %5 = extractelement <4 x float> %4, i32 0
+ %6 = load <4 x float> addrspace(9)* null
+ %7 = extractelement <4 x float> %6, i32 1
+ %8 = load <4 x float> addrspace(9)* null
+ %9 = extractelement <4 x float> %8, i32 2
+ br label %LOOP
+
+LOOP: ; preds = %ENDIF, %main_body
+ %temp4.0 = phi float [ %5, %main_body ], [ %temp5.0, %ENDIF ]
+ %temp5.0 = phi float [ %7, %main_body ], [ %temp6.0, %ENDIF ]
+ %temp6.0 = phi float [ %9, %main_body ], [ %temp4.0, %ENDIF ]
+ %temp8.0 = phi float [ 0.000000e+00, %main_body ], [ %27, %ENDIF ]
+ %10 = bitcast float %temp8.0 to i32
+ %11 = bitcast float %3 to i32
+ %12 = icmp sge i32 %10, %11
+ %13 = sext i1 %12 to i32
+ %14 = bitcast i32 %13 to float
+ %15 = bitcast float %14 to i32
+ %16 = icmp ne i32 %15, 0
+ br i1 %16, label %IF, label %ENDIF
+
+IF: ; preds = %LOOP
+ %17 = call float @llvm.AMDIL.clamp.(float %temp4.0, float 0.000000e+00, float 1.000000e+00)
+ %18 = call float @llvm.AMDIL.clamp.(float %temp5.0, float 0.000000e+00, float 1.000000e+00)
+ %19 = call float @llvm.AMDIL.clamp.(float %temp6.0, float 0.000000e+00, float 1.000000e+00)
+ %20 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
+ %21 = insertelement <4 x float> undef, float %17, i32 0
+ %22 = insertelement <4 x float> %21, float %18, i32 1
+ %23 = insertelement <4 x float> %22, float %19, i32 2
+ %24 = insertelement <4 x float> %23, float %20, i32 3
+ call void @llvm.R600.store.swizzle(<4 x float> %24, i32 0, i32 0)
+ ret void
+
+ENDIF: ; preds = %LOOP
+ %25 = bitcast float %temp8.0 to i32
+ %26 = add i32 %25, 1
+ %27 = bitcast i32 %26 to float
+ br label %LOOP
+}
+
+declare float @llvm.AMDIL.clamp.(float, float, float) #0
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { readnone }
diff --git a/test/CodeGen/R600/schedule-if-2.ll b/test/CodeGen/R600/schedule-if-2.ll
new file mode 100644
index 000000000000..6afd6772926b
--- /dev/null
+++ b/test/CodeGen/R600/schedule-if-2.ll
@@ -0,0 +1,94 @@
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;REQUIRES: asserts
+
+define void @main() {
+main_body:
+ %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %1 = extractelement <4 x float> %0, i32 0
+ %2 = fadd float 1.000000e+03, %1
+ %3 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %4 = extractelement <4 x float> %3, i32 0
+ %5 = bitcast float %4 to i32
+ %6 = icmp eq i32 %5, 0
+ %7 = sext i1 %6 to i32
+ %8 = bitcast i32 %7 to float
+ %9 = bitcast float %8 to i32
+ %10 = icmp ne i32 %9, 0
+ br i1 %10, label %IF, label %ELSE
+
+IF: ; preds = %main_body
+ %11 = call float @fabs(float %2)
+ %12 = fcmp ueq float %11, 0x7FF0000000000000
+ %13 = select i1 %12, float 1.000000e+00, float 0.000000e+00
+ %14 = fsub float -0.000000e+00, %13
+ %15 = fptosi float %14 to i32
+ %16 = bitcast i32 %15 to float
+ %17 = bitcast float %16 to i32
+ %18 = icmp ne i32 %17, 0
+ %. = select i1 %18, float 0x36A0000000000000, float 0.000000e+00
+ %19 = fcmp une float %2, %2
+ %20 = select i1 %19, float 1.000000e+00, float 0.000000e+00
+ %21 = fsub float -0.000000e+00, %20
+ %22 = fptosi float %21 to i32
+ %23 = bitcast i32 %22 to float
+ %24 = bitcast float %23 to i32
+ %25 = icmp ne i32 %24, 0
+ %temp8.0 = select i1 %25, float 0x36A0000000000000, float 0.000000e+00
+ %26 = bitcast float %. to i32
+ %27 = sitofp i32 %26 to float
+ %28 = bitcast float %temp8.0 to i32
+ %29 = sitofp i32 %28 to float
+ %30 = fcmp ugt float %2, 0.000000e+00
+ %31 = select i1 %30, float 1.000000e+00, float %2
+ %32 = fcmp uge float %31, 0.000000e+00
+ %33 = select i1 %32, float %31, float -1.000000e+00
+ %34 = fadd float %33, 1.000000e+00
+ %35 = fmul float %34, 5.000000e-01
+ br label %ENDIF
+
+ELSE: ; preds = %main_body
+ %36 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %37 = extractelement <4 x float> %36, i32 0
+ %38 = bitcast float %37 to i32
+ %39 = icmp eq i32 %38, 1
+ %40 = sext i1 %39 to i32
+ %41 = bitcast i32 %40 to float
+ %42 = bitcast float %41 to i32
+ %43 = icmp ne i32 %42, 0
+ br i1 %43, label %IF23, label %ENDIF
+
+ENDIF: ; preds = %IF23, %ELSE, %IF
+ %temp4.0 = phi float [ %2, %IF ], [ %56, %IF23 ], [ 0.000000e+00, %ELSE ]
+ %temp5.0 = phi float [ %27, %IF ], [ %60, %IF23 ], [ 0.000000e+00, %ELSE ]
+ %temp6.0 = phi float [ %29, %IF ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF23 ]
+ %temp7.0 = phi float [ %35, %IF ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF23 ]
+ %44 = insertelement <4 x float> undef, float %temp4.0, i32 0
+ %45 = insertelement <4 x float> %44, float %temp5.0, i32 1
+ %46 = insertelement <4 x float> %45, float %temp6.0, i32 2
+ %47 = insertelement <4 x float> %46, float %temp7.0, i32 3
+ call void @llvm.R600.store.swizzle(<4 x float> %47, i32 0, i32 0)
+ ret void
+
+IF23: ; preds = %ELSE
+ %48 = fcmp ult float 0.000000e+00, %2
+ %49 = select i1 %48, float 1.000000e+00, float 0.000000e+00
+ %50 = fsub float -0.000000e+00, %49
+ %51 = fptosi float %50 to i32
+ %52 = bitcast i32 %51 to float
+ %53 = bitcast float %52 to i32
+ %54 = icmp ne i32 %53, 0
+ %.28 = select i1 %54, float 0x36A0000000000000, float 0.000000e+00
+ %55 = bitcast float %.28 to i32
+ %56 = sitofp i32 %55 to float
+ %57 = load <4 x float> addrspace(8)* null
+ %58 = extractelement <4 x float> %57, i32 0
+ %59 = fsub float -0.000000e+00, %58
+ %60 = fadd float %2, %59
+ br label %ENDIF
+}
+
+declare float @fabs(float) #0
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { readonly }
diff --git a/test/CodeGen/R600/schedule-if.ll b/test/CodeGen/R600/schedule-if.ll
new file mode 100644
index 000000000000..347d92fd6a0e
--- /dev/null
+++ b/test/CodeGen/R600/schedule-if.ll
@@ -0,0 +1,46 @@
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;REQUIRES: asserts
+
+define void @main() {
+main_body:
+ %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %1 = extractelement <4 x float> %0, i32 0
+ %2 = bitcast float %1 to i32
+ %3 = icmp eq i32 %2, 0
+ %4 = sext i1 %3 to i32
+ %5 = bitcast i32 %4 to float
+ %6 = bitcast float %5 to i32
+ %7 = icmp ne i32 %6, 0
+ br i1 %7, label %ENDIF, label %ELSE
+
+ELSE: ; preds = %main_body
+ %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %9 = extractelement <4 x float> %8, i32 0
+ %10 = bitcast float %9 to i32
+ %11 = icmp eq i32 %10, 1
+ %12 = sext i1 %11 to i32
+ %13 = bitcast i32 %12 to float
+ %14 = bitcast float %13 to i32
+ %15 = icmp ne i32 %14, 0
+ br i1 %15, label %IF13, label %ENDIF
+
+ENDIF: ; preds = %IF13, %ELSE, %main_body
+ %temp.0 = phi float [ 1.000000e+03, %main_body ], [ 1.000000e+00, %IF13 ], [ 0.000000e+00, %ELSE ]
+ %temp1.0 = phi float [ 0.000000e+00, %main_body ], [ %23, %IF13 ], [ 0.000000e+00, %ELSE ]
+ %temp3.0 = phi float [ 1.000000e+00, %main_body ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF13 ]
+ %16 = insertelement <4 x float> undef, float %temp.0, i32 0
+ %17 = insertelement <4 x float> %16, float %temp1.0, i32 1
+ %18 = insertelement <4 x float> %17, float 0.000000e+00, i32 2
+ %19 = insertelement <4 x float> %18, float %temp3.0, i32 3
+ call void @llvm.R600.store.swizzle(<4 x float> %19, i32 0, i32 0)
+ ret void
+
+IF13: ; preds = %ELSE
+ %20 = load <4 x float> addrspace(8)* null
+ %21 = extractelement <4 x float> %20, i32 0
+ %22 = fsub float -0.000000e+00, %21
+ %23 = fadd float 1.000000e+03, %22
+ br label %ENDIF
+}
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
diff --git a/test/CodeGen/R600/schedule-vs-if-nested-loop.ll b/test/CodeGen/R600/schedule-vs-if-nested-loop.ll
new file mode 100644
index 000000000000..44b7c2f68002
--- /dev/null
+++ b/test/CodeGen/R600/schedule-vs-if-nested-loop.ll
@@ -0,0 +1,134 @@
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;REQUIRES: asserts
+
+define void @main() {
+main_body:
+ %0 = call float @llvm.R600.load.input(i32 4)
+ %1 = call float @llvm.R600.load.input(i32 5)
+ %2 = call float @llvm.R600.load.input(i32 6)
+ %3 = call float @llvm.R600.load.input(i32 7)
+ %4 = fcmp ult float %0, 0.000000e+00
+ %5 = select i1 %4, float 1.000000e+00, float 0.000000e+00
+ %6 = fsub float -0.000000e+00, %5
+ %7 = fptosi float %6 to i32
+ %8 = bitcast i32 %7 to float
+ %9 = bitcast float %8 to i32
+ %10 = icmp ne i32 %9, 0
+ br i1 %10, label %LOOP, label %ENDIF
+
+ENDIF: ; preds = %ENDIF16, %LOOP, %main_body
+ %temp.0 = phi float [ 0.000000e+00, %main_body ], [ %temp.1, %LOOP ], [ %temp.1, %ENDIF16 ]
+ %temp1.0 = phi float [ 1.000000e+00, %main_body ], [ %temp1.1, %LOOP ], [ %temp1.1, %ENDIF16 ]
+ %temp2.0 = phi float [ 0.000000e+00, %main_body ], [ %temp2.1, %LOOP ], [ %temp2.1, %ENDIF16 ]
+ %temp3.0 = phi float [ 0.000000e+00, %main_body ], [ %temp3.1, %LOOP ], [ %temp3.1, %ENDIF16 ]
+ %11 = load <4 x float> addrspace(9)* null
+ %12 = extractelement <4 x float> %11, i32 0
+ %13 = fmul float %12, %0
+ %14 = load <4 x float> addrspace(9)* null
+ %15 = extractelement <4 x float> %14, i32 1
+ %16 = fmul float %15, %0
+ %17 = load <4 x float> addrspace(9)* null
+ %18 = extractelement <4 x float> %17, i32 2
+ %19 = fmul float %18, %0
+ %20 = load <4 x float> addrspace(9)* null
+ %21 = extractelement <4 x float> %20, i32 3
+ %22 = fmul float %21, %0
+ %23 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+ %24 = extractelement <4 x float> %23, i32 0
+ %25 = fmul float %24, %1
+ %26 = fadd float %25, %13
+ %27 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+ %28 = extractelement <4 x float> %27, i32 1
+ %29 = fmul float %28, %1
+ %30 = fadd float %29, %16
+ %31 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+ %32 = extractelement <4 x float> %31, i32 2
+ %33 = fmul float %32, %1
+ %34 = fadd float %33, %19
+ %35 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+ %36 = extractelement <4 x float> %35, i32 3
+ %37 = fmul float %36, %1
+ %38 = fadd float %37, %22
+ %39 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+ %40 = extractelement <4 x float> %39, i32 0
+ %41 = fmul float %40, %2
+ %42 = fadd float %41, %26
+ %43 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+ %44 = extractelement <4 x float> %43, i32 1
+ %45 = fmul float %44, %2
+ %46 = fadd float %45, %30
+ %47 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+ %48 = extractelement <4 x float> %47, i32 2
+ %49 = fmul float %48, %2
+ %50 = fadd float %49, %34
+ %51 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+ %52 = extractelement <4 x float> %51, i32 3
+ %53 = fmul float %52, %2
+ %54 = fadd float %53, %38
+ %55 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+ %56 = extractelement <4 x float> %55, i32 0
+ %57 = fmul float %56, %3
+ %58 = fadd float %57, %42
+ %59 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+ %60 = extractelement <4 x float> %59, i32 1
+ %61 = fmul float %60, %3
+ %62 = fadd float %61, %46
+ %63 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+ %64 = extractelement <4 x float> %63, i32 2
+ %65 = fmul float %64, %3
+ %66 = fadd float %65, %50
+ %67 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+ %68 = extractelement <4 x float> %67, i32 3
+ %69 = fmul float %68, %3
+ %70 = fadd float %69, %54
+ %71 = insertelement <4 x float> undef, float %58, i32 0
+ %72 = insertelement <4 x float> %71, float %62, i32 1
+ %73 = insertelement <4 x float> %72, float %66, i32 2
+ %74 = insertelement <4 x float> %73, float %70, i32 3
+ call void @llvm.R600.store.swizzle(<4 x float> %74, i32 60, i32 1)
+ %75 = insertelement <4 x float> undef, float %temp.0, i32 0
+ %76 = insertelement <4 x float> %75, float %temp1.0, i32 1
+ %77 = insertelement <4 x float> %76, float %temp2.0, i32 2
+ %78 = insertelement <4 x float> %77, float %temp3.0, i32 3
+ call void @llvm.R600.store.swizzle(<4 x float> %78, i32 0, i32 2)
+ ret void
+
+LOOP: ; preds = %main_body, %ENDIF19
+ %temp.1 = phi float [ %93, %ENDIF19 ], [ 0.000000e+00, %main_body ]
+ %temp1.1 = phi float [ %94, %ENDIF19 ], [ 1.000000e+00, %main_body ]
+ %temp2.1 = phi float [ %95, %ENDIF19 ], [ 0.000000e+00, %main_body ]
+ %temp3.1 = phi float [ %96, %ENDIF19 ], [ 0.000000e+00, %main_body ]
+ %temp4.0 = phi float [ %97, %ENDIF19 ], [ -2.000000e+00, %main_body ]
+ %79 = fcmp uge float %temp4.0, %0
+ %80 = select i1 %79, float 1.000000e+00, float 0.000000e+00
+ %81 = fsub float -0.000000e+00, %80
+ %82 = fptosi float %81 to i32
+ %83 = bitcast i32 %82 to float
+ %84 = bitcast float %83 to i32
+ %85 = icmp ne i32 %84, 0
+ br i1 %85, label %ENDIF, label %ENDIF16
+
+ENDIF16: ; preds = %LOOP
+ %86 = fcmp une float %2, %temp4.0
+ %87 = select i1 %86, float 1.000000e+00, float 0.000000e+00
+ %88 = fsub float -0.000000e+00, %87
+ %89 = fptosi float %88 to i32
+ %90 = bitcast i32 %89 to float
+ %91 = bitcast float %90 to i32
+ %92 = icmp ne i32 %91, 0
+ br i1 %92, label %ENDIF, label %ENDIF19
+
+ENDIF19: ; preds = %ENDIF16
+ %93 = fadd float %temp.1, 1.000000e+00
+ %94 = fadd float %temp1.1, 0.000000e+00
+ %95 = fadd float %temp2.1, 0.000000e+00
+ %96 = fadd float %temp3.1, 0.000000e+00
+ %97 = fadd float %temp4.0, 1.000000e+00
+ br label %LOOP
+}
+
+declare float @llvm.R600.load.input(i32) #0
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { readnone }
diff --git a/test/CodeGen/R600/sdiv.ll b/test/CodeGen/R600/sdiv.ll
new file mode 100644
index 000000000000..3556facfbab3
--- /dev/null
+++ b/test/CodeGen/R600/sdiv.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; The code generated by sdiv is long and complex and may frequently change.
+; The goal of this test is to make sure the ISel doesn't fail.
+;
+; This program was previously failing to compile when one of the selectcc
+; opcodes generated by the sdiv lowering was being legalized and optimized to:
+; selectcc Remainder -1, 0, -1, SETGT
+; This was fixed by adding an additional pattern in R600Instructions.td to
+; match this pattern with a CNDGE_INT.
+
+; CHECK: RETURN
+
+define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+ %num = load i32 addrspace(1) * %in
+ %den = load i32 addrspace(1) * %den_ptr
+ %result = sdiv i32 %num, %den
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/selectcc-icmp-select-float.ll b/test/CodeGen/R600/selectcc-icmp-select-float.ll
new file mode 100644
index 000000000000..359ca1e6f8ce
--- /dev/null
+++ b/test/CodeGen/R600/selectcc-icmp-select-float.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; Note additional optimizations may cause this SGT to be replaced with a
+; CND* instruction.
+; CHECK: SETGT_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], literal.x, -1}}
+; Test a selectcc with i32 LHS/RHS and float True/False
+
+define void @test(float addrspace(1)* %out, i32 addrspace(1)* %in) {
+entry:
+ %0 = load i32 addrspace(1)* %in
+ %1 = icmp sge i32 %0, 0
+ %2 = select i1 %1, float 1.0, float 0.0
+ store float %2, float addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/selectcc-opt.ll b/test/CodeGen/R600/selectcc-opt.ll
new file mode 100644
index 000000000000..02d935390423
--- /dev/null
+++ b/test/CodeGen/R600/selectcc-opt.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @test_a
+; CHECK-NOT: CND
+; CHECK: SET{{[NEQGTL]+}}_DX10
+
+define void @test_a(i32 addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp ult float %in, 0.000000e+00
+ %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+ %2 = fsub float -0.000000e+00, %1
+ %3 = fptosi float %2 to i32
+ %4 = bitcast i32 %3 to float
+ %5 = bitcast float %4 to i32
+ %6 = icmp ne i32 %5, 0
+ br i1 %6, label %IF, label %ENDIF
+
+IF:
+ %7 = getelementptr i32 addrspace(1)* %out, i32 1
+ store i32 0, i32 addrspace(1)* %7
+ br label %ENDIF
+
+ENDIF:
+ store i32 0, i32 addrspace(1)* %out
+ ret void
+}
+
+; Same as test_a, but the branch labels are swapped to produce the inverse cc
+; for the icmp instruction
+
+; CHECK: @test_b
+; CHECK: SET{{[GTEQN]+}}_DX10
+; CHECK-NEXT: PRED_
+define void @test_b(i32 addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp ult float %in, 0.0
+ %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+ %2 = fsub float -0.000000e+00, %1
+ %3 = fptosi float %2 to i32
+ %4 = bitcast i32 %3 to float
+ %5 = bitcast float %4 to i32
+ %6 = icmp ne i32 %5, 0
+ br i1 %6, label %ENDIF, label %IF
+
+IF:
+ %7 = getelementptr i32 addrspace(1)* %out, i32 1
+ store i32 0, i32 addrspace(1)* %7
+ br label %ENDIF
+
+ENDIF:
+ store i32 0, i32 addrspace(1)* %out
+ ret void
+}
+
+; Test a CND*_INT instruction with float true/false values
+; CHECK: @test_c
+; CHECK: CND{{[GTE]+}}_INT
+define void @test_c(float addrspace(1)* %out, i32 %in) {
+entry:
+ %0 = icmp sgt i32 %in, 0
+ %1 = select i1 %0, float 2.0, float 3.0
+ store float %1, float addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/selectcc_cnde.ll b/test/CodeGen/R600/selectcc_cnde.ll
new file mode 100644
index 000000000000..f0a0f512ba15
--- /dev/null
+++ b/test/CodeGen/R600/selectcc_cnde.ll
@@ -0,0 +1,11 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK-NOT: SETE
+;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], 1.0, literal.x, [-0-9]+\(2.0}}
+define void @test(float addrspace(1)* %out, float addrspace(1)* %in) {
+ %1 = load float addrspace(1)* %in
+ %2 = fcmp oeq float %1, 0.0
+ %3 = select i1 %2, float 1.0, float 2.0
+ store float %3, float addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/selectcc_cnde_int.ll b/test/CodeGen/R600/selectcc_cnde_int.ll
new file mode 100644
index 000000000000..b38078e26db6
--- /dev/null
+++ b/test/CodeGen/R600/selectcc_cnde_int.ll
@@ -0,0 +1,11 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK-NOT: SETE_INT
+;CHECK: CNDE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], 1, literal.x, 2}}
+define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %1 = load i32 addrspace(1)* %in
+ %2 = icmp eq i32 %1, 0
+ %3 = select i1 %2, i32 1, i32 2
+ store i32 %3, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/set-dx10.ll b/test/CodeGen/R600/set-dx10.ll
new file mode 100644
index 000000000000..54febcf0e68e
--- /dev/null
+++ b/test/CodeGen/R600/set-dx10.ll
@@ -0,0 +1,137 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; These tests check that floating point comparisons which are used by select
+; to store integer true (-1) and false (0) values are lowered to one of the
+; SET*DX10 instructions.
+
+; CHECK: @fcmp_une_select_fptosi
+; CHECK: SETNE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp une float %in, 5.0
+ %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+ %2 = fsub float -0.000000e+00, %1
+ %3 = fptosi float %2 to i32
+ store i32 %3, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @fcmp_une_select_i32
+; CHECK: SETNE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp une float %in, 5.0
+ %1 = select i1 %0, i32 -1, i32 0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @fcmp_ueq_select_fptosi
+; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_ueq_select_fptosi(i32 addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp ueq float %in, 5.0
+ %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+ %2 = fsub float -0.000000e+00, %1
+ %3 = fptosi float %2 to i32
+ store i32 %3, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @fcmp_ueq_select_i32
+; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_ueq_select_i32(i32 addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp ueq float %in, 5.0
+ %1 = select i1 %0, i32 -1, i32 0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @fcmp_ugt_select_fptosi
+; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_ugt_select_fptosi(i32 addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp ugt float %in, 5.0
+ %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+ %2 = fsub float -0.000000e+00, %1
+ %3 = fptosi float %2 to i32
+ store i32 %3, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @fcmp_ugt_select_i32
+; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_ugt_select_i32(i32 addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp ugt float %in, 5.0
+ %1 = select i1 %0, i32 -1, i32 0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @fcmp_uge_select_fptosi
+; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_uge_select_fptosi(i32 addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp uge float %in, 5.0
+ %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+ %2 = fsub float -0.000000e+00, %1
+ %3 = fptosi float %2 to i32
+ store i32 %3, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @fcmp_uge_select_i32
+; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_uge_select_i32(i32 addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp uge float %in, 5.0
+ %1 = select i1 %0, i32 -1, i32 0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @fcmp_ule_select_fptosi
+; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @fcmp_ule_select_fptosi(i32 addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp ule float %in, 5.0
+ %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+ %2 = fsub float -0.000000e+00, %1
+ %3 = fptosi float %2 to i32
+ store i32 %3, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @fcmp_ule_select_i32
+; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @fcmp_ule_select_i32(i32 addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp ule float %in, 5.0
+ %1 = select i1 %0, i32 -1, i32 0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @fcmp_ult_select_fptosi
+; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @fcmp_ult_select_fptosi(i32 addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp ult float %in, 5.0
+ %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+ %2 = fsub float -0.000000e+00, %1
+ %3 = fptosi float %2 to i32
+ store i32 %3, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @fcmp_ult_select_i32
+; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @fcmp_ult_select_i32(i32 addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp ult float %in, 5.0
+ %1 = select i1 %0, i32 -1, i32 0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/setcc.v4i32.ll b/test/CodeGen/R600/setcc.v4i32.ll
new file mode 100644
index 000000000000..0752f2e63dbf
--- /dev/null
+++ b/test/CodeGen/R600/setcc.v4i32.ll
@@ -0,0 +1,12 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+;CHECK: SETE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+ %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
+ %a = load <4 x i32> addrspace(1) * %in
+ %b = load <4 x i32> addrspace(1) * %b_ptr
+ %result = icmp eq <4 x i32> %a, %b
+ %sext = sext <4 x i1> %result to <4 x i32>
+ store <4 x i32> %sext, <4 x i32> addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/seto.ll b/test/CodeGen/R600/seto.ll
new file mode 100644
index 000000000000..5ab4b87d570c
--- /dev/null
+++ b/test/CodeGen/R600/seto.ll
@@ -0,0 +1,13 @@
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+
+;CHECK: V_CMP_O_F32_e64 SGPR0_SGPR1, VGPR0, VGPR0, 0, 0, 0, 0
+
+define void @main(float %p) {
+main_body:
+ %c = fcmp oeq float %p, %p
+ %r = select i1 %c, float 1.000000e+00, float 0.000000e+00
+ call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %r, float %r, float %r, float %r)
+ ret void
+}
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/setuo.ll b/test/CodeGen/R600/setuo.ll
new file mode 100644
index 000000000000..320835576d41
--- /dev/null
+++ b/test/CodeGen/R600/setuo.ll
@@ -0,0 +1,13 @@
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+
+;CHECK: V_CMP_U_F32_e64 SGPR0_SGPR1, VGPR0, VGPR0, 0, 0, 0, 0
+
+define void @main(float %p) {
+main_body:
+ %c = fcmp une float %p, %p
+ %r = select i1 %c, float 1.000000e+00, float 0.000000e+00
+ call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %r, float %r, float %r, float %r)
+ ret void
+}
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/short-args.ll b/test/CodeGen/R600/short-args.ll
new file mode 100644
index 000000000000..b69e327bf6df
--- /dev/null
+++ b/test/CodeGen/R600/short-args.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @i8_arg
+; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
+
+define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
+entry:
+ %0 = zext i8 %in to i32
+ store i32 %0, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; CHECK: @i8_zext_arg
+; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
+
+define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
+entry:
+ %0 = zext i8 %in to i32
+ store i32 %0, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; CHECK: @i16_arg
+; CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
+
+define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
+entry:
+ %0 = zext i16 %in to i32
+ store i32 %0, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; CHECK: @i16_zext_arg
+; CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
+
+define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
+entry:
+ %0 = zext i16 %in to i32
+ store i32 %0, i32 addrspace(1)* %out, align 4
+ ret void
+}
diff --git a/test/CodeGen/R600/store.v4f32.ll b/test/CodeGen/R600/store.v4f32.ll
new file mode 100644
index 000000000000..8b0d24445971
--- /dev/null
+++ b/test/CodeGen/R600/store.v4f32.ll
@@ -0,0 +1,9 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
+
+define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+ %1 = load <4 x float> addrspace(1) * %in
+ store <4 x float> %1, <4 x float> addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/store.v4i32.ll b/test/CodeGen/R600/store.v4i32.ll
new file mode 100644
index 000000000000..a659815ddeba
--- /dev/null
+++ b/test/CodeGen/R600/store.v4i32.ll
@@ -0,0 +1,9 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
+
+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+ %1 = load <4 x i32> addrspace(1) * %in
+ store <4 x i32> %1, <4 x i32> addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/udiv.v4i32.ll b/test/CodeGen/R600/udiv.v4i32.ll
new file mode 100644
index 000000000000..47657a6be75e
--- /dev/null
+++ b/test/CodeGen/R600/udiv.v4i32.ll
@@ -0,0 +1,15 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;The code generated by udiv is long and complex and may frequently change.
+;The goal of this test is to make sure the ISel doesn't fail when it gets
+;a v4i32 udiv
+;CHECK: RETURN
+
+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+ %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
+ %a = load <4 x i32> addrspace(1) * %in
+ %b = load <4 x i32> addrspace(1) * %b_ptr
+ %result = udiv <4 x i32> %a, %b
+ store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/unsupported-cc.ll b/test/CodeGen/R600/unsupported-cc.ll
new file mode 100644
index 000000000000..b48c59151831
--- /dev/null
+++ b/test/CodeGen/R600/unsupported-cc.ll
@@ -0,0 +1,83 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; These tests are for condition codes that are not supported by the hardware
+
+; CHECK: @slt
+; CHECK: SETGT_INT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 5(7.006492e-45)
+define void @slt(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %0 = icmp slt i32 %in, 5
+ %1 = select i1 %0, i32 -1, i32 0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @ult_i32
+; CHECK: SETGT_UINT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 5(7.006492e-45)
+define void @ult_i32(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %0 = icmp ult i32 %in, 5
+ %1 = select i1 %0, i32 -1, i32 0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @ult_float
+; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @ult_float(float addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp ult float %in, 5.0
+ %1 = select i1 %0, float 1.0, float 0.0
+ store float %1, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @olt
+; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @olt(float addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp olt float %in, 5.0
+ %1 = select i1 %0, float 1.0, float 0.0
+ store float %1, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @sle
+; CHECK: SETGT_INT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 6(8.407791e-45)
+define void @sle(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %0 = icmp sle i32 %in, 5
+ %1 = select i1 %0, i32 -1, i32 0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @ule_i32
+; CHECK: SETGT_UINT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 6(8.407791e-45)
+define void @ule_i32(i32 addrspace(1)* %out, i32 %in) {
+entry:
+ %0 = icmp ule i32 %in, 5
+ %1 = select i1 %0, i32 -1, i32 0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @ule_float
+; CHECK: SETGE T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @ule_float(float addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp ule float %in, 5.0
+ %1 = select i1 %0, float 1.0, float 0.0
+ store float %1, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @ole
+; CHECK: SETGE T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @ole(float addrspace(1)* %out, float %in) {
+entry:
+ %0 = fcmp ole float %in, 5.0
+ %1 = select i1 %0, float 1.0, float 0.0
+ store float %1, float addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/urem.v4i32.ll b/test/CodeGen/R600/urem.v4i32.ll
new file mode 100644
index 000000000000..2e7388caa6ce
--- /dev/null
+++ b/test/CodeGen/R600/urem.v4i32.ll
@@ -0,0 +1,15 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;The code generated by urem is long and complex and may frequently change.
+;The goal of this test is to make sure the ISel doesn't fail when it gets
+;a v4i32 urem
+;CHECK: RETURN
+
+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+ %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
+ %a = load <4 x i32> addrspace(1) * %in
+ %b = load <4 x i32> addrspace(1) * %b_ptr
+ %result = urem <4 x i32> %a, %b
+ store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/vec4-expand.ll b/test/CodeGen/R600/vec4-expand.ll
new file mode 100644
index 000000000000..8f62bc692908
--- /dev/null
+++ b/test/CodeGen/R600/vec4-expand.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @fp_to_sint
+; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @fp_to_sint(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+ %value = load <4 x float> addrspace(1) * %in
+ %result = fptosi <4 x float> %value to <4 x i32>
+ store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @fp_to_uint
+; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @fp_to_uint(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+ %value = load <4 x float> addrspace(1) * %in
+ %result = fptoui <4 x float> %value to <4 x i32>
+ store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @sint_to_fp
+; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @sint_to_fp(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+ %value = load <4 x i32> addrspace(1) * %in
+ %result = sitofp <4 x i32> %value to <4 x float>
+ store <4 x float> %result, <4 x float> addrspace(1)* %out
+ ret void
+}
+
+; CHECK: @uint_to_fp
+; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @uint_to_fp(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+ %value = load <4 x i32> addrspace(1) * %in
+ %result = uitofp <4 x i32> %value to <4 x float>
+ store <4 x float> %result, <4 x float> addrspace(1)* %out
+ ret void
+}