diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2013-04-08 18:41:23 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2013-04-08 18:41:23 +0000 |
commit | 4a16efa3e43e35f0cc9efe3a67f620f0017c3d36 (patch) | |
tree | 06099edc18d30894081a822b756f117cbe0b8207 /test/CodeGen/R600 | |
parent | 482e7bddf617ae804dc47133cb07eb4aa81e45de (diff) | |
download | src-4a16efa3e43e35f0cc9efe3a67f620f0017c3d36.tar.gz src-4a16efa3e43e35f0cc9efe3a67f620f0017c3d36.zip |
Vendor import of llvm trunk r178860:vendor/llvm/llvm-trunk-r178860
Notes
Notes:
svn path=/vendor/llvm/dist/; revision=249259
svn path=/vendor/llvm/llvm-trunk-r178860/; revision=249260; tag=vendor/llvm/llvm-trunk-r178860
Diffstat (limited to 'test/CodeGen/R600')
65 files changed, 2933 insertions, 0 deletions
diff --git a/test/CodeGen/R600/128bit-kernel-args.ll b/test/CodeGen/R600/128bit-kernel-args.ll new file mode 100644 index 000000000000..114f9e74474f --- /dev/null +++ b/test/CodeGen/R600/128bit-kernel-args.ll @@ -0,0 +1,18 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: @v4i32_kernel_arg +; CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 40 + +define void @v4i32_kernel_arg(<4 x i32> addrspace(1)* %out, <4 x i32> %in) { +entry: + store <4 x i32> %in, <4 x i32> addrspace(1)* %out + ret void +} + +; CHECK: @v4f32_kernel_arg +; CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 40 +define void @v4f32_kernel_args(<4 x float> addrspace(1)* %out, <4 x float> %in) { +entry: + store <4 x float> %in, <4 x float> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/add.v4i32.ll b/test/CodeGen/R600/add.v4i32.ll new file mode 100644 index 000000000000..ac4a87417bde --- /dev/null +++ b/test/CodeGen/R600/add.v4i32.ll @@ -0,0 +1,15 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32> addrspace(1) * %in + %b = load <4 x i32> addrspace(1) * %b_ptr + %result = add <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/alu-split.ll b/test/CodeGen/R600/alu-split.ll new file mode 100644 index 000000000000..afefcd9f78b0 --- /dev/null +++ b/test/CodeGen/R600/alu-split.ll @@ -0,0 +1,850 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: ALU +;CHECK: ALU +;CHECK: ALU +;CHECK-NOT: ALU + +define void @main() #0 { +main_body: + %0 = call float @llvm.R600.load.input(i32 4) + %1 = call float @llvm.R600.load.input(i32 5) + %2 = call float @llvm.R600.load.input(i32 6) + %3 = call float @llvm.R600.load.input(i32 7) + %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16) + %5 = extractelement <4 x float> %4, i32 0 + %6 = fcmp une float 0x4016F2B020000000, %5 + %7 = select i1 %6, float 1.000000e+00, float 0.000000e+00 + %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16) + %9 = extractelement <4 x float> %8, i32 1 + %10 = fcmp une float 0x401FDCC640000000, %9 + %11 = select i1 %10, float 1.000000e+00, float 0.000000e+00 + %12 = fsub float -0.000000e+00, %7 + %13 = fptosi float %12 to i32 + %14 = fsub float -0.000000e+00, %11 + %15 = fptosi float %14 to i32 + %16 = bitcast i32 %13 to float + %17 = bitcast i32 %15 to float + %18 = bitcast float %16 to i32 + %19 = bitcast float %17 to i32 + %20 = or i32 %18, %19 + %21 = bitcast i32 %20 to float + %22 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 17) + %23 = extractelement <4 x float> %22, i32 0 + %24 = fcmp une float 0xC00574BC60000000, %23 + %25 = select i1 %24, float 1.000000e+00, float 0.000000e+00 + %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 17) + %27 = extractelement <4 x float> %26, i32 1 + %28 = fcmp une float 0x40210068E0000000, %27 + %29 = select i1 %28, float 1.000000e+00, float 0.000000e+00 + %30 = fsub float -0.000000e+00, %25 + %31 = fptosi float %30 to i32 + %32 = fsub float -0.000000e+00, %29 + %33 = fptosi float %32 to i32 + %34 = bitcast i32 %31 to float + %35 = bitcast i32 %33 to float + %36 = bitcast float %34 to i32 + %37 = bitcast float %35 to i32 + %38 = or i32 %36, %37 + %39 = bitcast i32 %38 to float + %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 18) + %41 = extractelement <4 x float> %40, i32 0 + %42 = fcmp une float 0xBFC9A6B500000000, %41 + %43 = select i1 %42, float 1.000000e+00, float 0.000000e+00 + %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 18) + %45 = extractelement <4 x float> %44, i32 1 + %46 = fcmp une float 0xC0119BDA60000000, %45 + %47 = select i1 %46, float 1.000000e+00, float 0.000000e+00 + %48 = fsub float -0.000000e+00, %43 + %49 = fptosi float %48 to i32 + %50 = fsub float -0.000000e+00, %47 + %51 = fptosi float %50 to i32 + %52 = bitcast i32 %49 to float + %53 = bitcast i32 %51 to float + %54 = bitcast float %52 to i32 + %55 = bitcast float %53 to i32 + %56 = or i32 %54, %55 + %57 = bitcast i32 %56 to float + %58 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 19) + %59 = extractelement <4 x float> %58, i32 0 + %60 = fcmp une float 0xC02085D640000000, %59 + %61 = select i1 %60, float 1.000000e+00, float 0.000000e+00 + %62 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 19) + %63 = extractelement <4 x float> %62, i32 1 + %64 = fcmp une float 0xBFD7C1BDA0000000, %63 + %65 = select i1 %64, float 1.000000e+00, float 0.000000e+00 + %66 = fsub float -0.000000e+00, %61 + %67 = fptosi float %66 to i32 + %68 = fsub float -0.000000e+00, %65 + %69 = fptosi float %68 to i32 + %70 = bitcast i32 %67 to float + %71 = bitcast i32 %69 to float + %72 = bitcast float %70 to i32 + %73 = bitcast float %71 to i32 + %74 = or i32 %72, %73 + %75 = bitcast i32 %74 to float + %76 = insertelement <4 x float> undef, float %21, i32 0 + %77 = insertelement <4 x float> %76, float %39, i32 1 + %78 = insertelement <4 x float> %77, float %57, i32 2 + %79 = insertelement <4 x float> %78, float %75, i32 3 + %80 = insertelement <4 x float> undef, float %21, i32 0 + %81 = insertelement <4 x float> %80, float %39, i32 1 + %82 = insertelement <4 x float> %81, float %57, i32 2 + %83 = insertelement <4 x float> %82, float %75, i32 3 + %84 = call float @llvm.AMDGPU.dp4(<4 x float> %79, <4 x float> %83) + %85 = bitcast float %84 to i32 + %86 = icmp ne i32 %85, 0 + %87 = sext i1 %86 to i32 + %88 = bitcast i32 %87 to float + %89 = bitcast float %88 to i32 + %90 = xor i32 %89, -1 + %91 = bitcast i32 %90 to float + %92 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 20) + %93 = extractelement <4 x float> %92, i32 0 + %94 = fcmp une float 0x401FDCC640000000, %93 + %95 = select i1 %94, float 1.000000e+00, float 0.000000e+00 + %96 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 20) + %97 = extractelement <4 x float> %96, i32 1 + %98 = fcmp une float 0xC00574BC60000000, %97 + %99 = select i1 %98, float 1.000000e+00, float 0.000000e+00 + %100 = fsub float -0.000000e+00, %95 + %101 = fptosi float %100 to i32 + %102 = fsub float -0.000000e+00, %99 + %103 = fptosi float %102 to i32 + %104 = bitcast i32 %101 to float + %105 = bitcast i32 %103 to float + %106 = bitcast float %104 to i32 + %107 = bitcast float %105 to i32 + %108 = or i32 %106, %107 + %109 = bitcast i32 %108 to float + %110 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 21) + %111 = extractelement <4 x float> %110, i32 0 + %112 = fcmp une float 0x40210068E0000000, %111 + %113 = select i1 %112, float 1.000000e+00, float 0.000000e+00 + %114 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 21) + %115 = extractelement <4 x float> %114, i32 1 + %116 = fcmp une float 0xBFC9A6B500000000, %115 + %117 = select i1 %116, float 1.000000e+00, float 0.000000e+00 + %118 = fsub float -0.000000e+00, %113 + %119 = fptosi float %118 to i32 + %120 = fsub float -0.000000e+00, %117 + %121 = fptosi float %120 to i32 + %122 = bitcast i32 %119 to float + %123 = bitcast i32 %121 to float + %124 = bitcast float %122 to i32 + %125 = bitcast float %123 to i32 + %126 = or i32 %124, %125 + %127 = bitcast i32 %126 to float + %128 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 22) + %129 = extractelement <4 x float> %128, i32 0 + %130 = fcmp une float 0xC0119BDA60000000, %129 + %131 = select i1 %130, float 1.000000e+00, float 0.000000e+00 + %132 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 22) + %133 = extractelement <4 x float> %132, i32 1 + %134 = fcmp une float 0xC02085D640000000, %133 + %135 = select i1 %134, float 1.000000e+00, float 0.000000e+00 + %136 = fsub float -0.000000e+00, %131 + %137 = fptosi float %136 to i32 + %138 = fsub float -0.000000e+00, %135 + %139 = fptosi float %138 to i32 + %140 = bitcast i32 %137 to float + %141 = bitcast i32 %139 to float + %142 = bitcast float %140 to i32 + %143 = bitcast float %141 to i32 + %144 = or i32 %142, %143 + %145 = bitcast i32 %144 to float + %146 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 23) + %147 = extractelement <4 x float> %146, i32 0 + %148 = fcmp une float 0xBFD7C1BDA0000000, %147 + %149 = select i1 %148, float 1.000000e+00, float 0.000000e+00 + %150 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 23) + %151 = extractelement <4 x float> %150, i32 1 + %152 = fcmp une float 0x401E1D7DC0000000, %151 + %153 = select i1 %152, float 1.000000e+00, float 0.000000e+00 + %154 = fsub float -0.000000e+00, %149 + %155 = fptosi float %154 to i32 + %156 = fsub float -0.000000e+00, %153 + %157 = fptosi float %156 to i32 + %158 = bitcast i32 %155 to float + %159 = bitcast i32 %157 to float + %160 = bitcast float %158 to i32 + %161 = bitcast float %159 to i32 + %162 = or i32 %160, %161 + %163 = bitcast i32 %162 to float + %164 = insertelement <4 x float> undef, float %109, i32 0 + %165 = insertelement <4 x float> %164, float %127, i32 1 + %166 = insertelement <4 x float> %165, float %145, i32 2 + %167 = insertelement <4 x float> %166, float %163, i32 3 + %168 = insertelement <4 x float> undef, float %109, i32 0 + %169 = insertelement <4 x float> %168, float %127, i32 1 + %170 = insertelement <4 x float> %169, float %145, i32 2 + %171 = insertelement <4 x float> %170, float %163, i32 3 + %172 = call float @llvm.AMDGPU.dp4(<4 x float> %167, <4 x float> %171) + %173 = bitcast float %172 to i32 + %174 = icmp ne i32 %173, 0 + %175 = sext i1 %174 to i32 + %176 = bitcast i32 %175 to float + %177 = bitcast float %176 to i32 + %178 = xor i32 %177, -1 + %179 = bitcast i32 %178 to float + %180 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) + %181 = extractelement <4 x float> %180, i32 0 + %182 = fcmp une float 0x401FDCC640000000, %181 + %183 = select i1 %182, float 1.000000e+00, float 0.000000e+00 + %184 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) + %185 = extractelement <4 x float> %184, i32 1 + %186 = fcmp une float 0xC00574BC60000000, %185 + %187 = select i1 %186, float 1.000000e+00, float 0.000000e+00 + %188 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) + %189 = extractelement <4 x float> %188, i32 2 + %190 = fcmp une float 0x40210068E0000000, %189 + %191 = select i1 %190, float 1.000000e+00, float 0.000000e+00 + %192 = fsub float -0.000000e+00, %183 + %193 = fptosi float %192 to i32 + %194 = fsub float -0.000000e+00, %187 + %195 = fptosi float %194 to i32 + %196 = fsub float -0.000000e+00, %191 + %197 = fptosi float %196 to i32 + %198 = bitcast i32 %193 to float + %199 = bitcast i32 %195 to float + %200 = bitcast i32 %197 to float + %201 = bitcast float %199 to i32 + %202 = bitcast float %200 to i32 + %203 = or i32 %201, %202 + %204 = bitcast i32 %203 to float + %205 = bitcast float %198 to i32 + %206 = bitcast float %204 to i32 + %207 = or i32 %205, %206 + %208 = bitcast i32 %207 to float + %209 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) + %210 = extractelement <4 x float> %209, i32 0 + %211 = fcmp une float 0xBFC9A6B500000000, %210 + %212 = select i1 %211, float 1.000000e+00, float 0.000000e+00 + %213 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) + %214 = extractelement <4 x float> %213, i32 1 + %215 = fcmp une float 0xC0119BDA60000000, %214 + %216 = select i1 %215, float 1.000000e+00, float 0.000000e+00 + %217 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) + %218 = extractelement <4 x float> %217, i32 2 + %219 = fcmp une float 0xC02085D640000000, %218 + %220 = select i1 %219, float 1.000000e+00, float 0.000000e+00 + %221 = fsub float -0.000000e+00, %212 + %222 = fptosi float %221 to i32 + %223 = fsub float -0.000000e+00, %216 + %224 = fptosi float %223 to i32 + %225 = fsub float -0.000000e+00, %220 + %226 = fptosi float %225 to i32 + %227 = bitcast i32 %222 to float + %228 = bitcast i32 %224 to float + %229 = bitcast i32 %226 to float + %230 = bitcast float %228 to i32 + %231 = bitcast float %229 to i32 + %232 = or i32 %230, %231 + %233 = bitcast i32 %232 to float + %234 = bitcast float %227 to i32 + %235 = bitcast float %233 to i32 + %236 = or i32 %234, %235 + %237 = bitcast i32 %236 to float + %238 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10) + %239 = extractelement <4 x float> %238, i32 0 + %240 = fcmp une float 0xBFD7C1BDA0000000, %239 + %241 = select i1 %240, float 1.000000e+00, float 0.000000e+00 + %242 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10) + %243 = extractelement <4 x float> %242, i32 1 + %244 = fcmp une float 0x401E1D7DC0000000, %243 + %245 = select i1 %244, float 1.000000e+00, float 0.000000e+00 + %246 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10) + %247 = extractelement <4 x float> %246, i32 2 + %248 = fcmp une float 0xC019893740000000, %247 + %249 = select i1 %248, float 1.000000e+00, float 0.000000e+00 + %250 = fsub float -0.000000e+00, %241 + %251 = fptosi float %250 to i32 + %252 = fsub float -0.000000e+00, %245 + %253 = fptosi float %252 to i32 + %254 = fsub float -0.000000e+00, %249 + %255 = fptosi float %254 to i32 + %256 = bitcast i32 %251 to float + %257 = bitcast i32 %253 to float + %258 = bitcast i32 %255 to float + %259 = bitcast float %257 to i32 + %260 = bitcast float %258 to i32 + %261 = or i32 %259, %260 + %262 = bitcast i32 %261 to float + %263 = bitcast float %256 to i32 + %264 = bitcast float %262 to i32 + %265 = or i32 %263, %264 + %266 = bitcast i32 %265 to float + %267 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11) + %268 = extractelement <4 x float> %267, i32 0 + %269 = fcmp une float 0x40220F0D80000000, %268 + %270 = select i1 %269, float 1.000000e+00, float 0.000000e+00 + %271 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11) + %272 = extractelement <4 x float> %271, i32 1 + %273 = fcmp une float 0xC018E2EB20000000, %272 + %274 = select i1 %273, float 1.000000e+00, float 0.000000e+00 + %275 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11) + %276 = extractelement <4 x float> %275, i32 2 + %277 = fcmp une float 0xBFEA8DB8C0000000, %276 + %278 = select i1 %277, float 1.000000e+00, float 0.000000e+00 + %279 = fsub float -0.000000e+00, %270 + %280 = fptosi float %279 to i32 + %281 = fsub float -0.000000e+00, %274 + %282 = fptosi float %281 to i32 + %283 = fsub float -0.000000e+00, %278 + %284 = fptosi float %283 to i32 + %285 = bitcast i32 %280 to float + %286 = bitcast i32 %282 to float + %287 = bitcast i32 %284 to float + %288 = bitcast float %286 to i32 + %289 = bitcast float %287 to i32 + %290 = or i32 %288, %289 + %291 = bitcast i32 %290 to float + %292 = bitcast float %285 to i32 + %293 = bitcast float %291 to i32 + %294 = or i32 %292, %293 + %295 = bitcast i32 %294 to float + %296 = insertelement <4 x float> undef, float %208, i32 0 + %297 = insertelement <4 x float> %296, float %237, i32 1 + %298 = insertelement <4 x float> %297, float %266, i32 2 + %299 = insertelement <4 x float> %298, float %295, i32 3 + %300 = insertelement <4 x float> undef, float %208, i32 0 + %301 = insertelement <4 x float> %300, float %237, i32 1 + %302 = insertelement <4 x float> %301, float %266, i32 2 + %303 = insertelement <4 x float> %302, float %295, i32 3 + %304 = call float @llvm.AMDGPU.dp4(<4 x float> %299, <4 x float> %303) + %305 = bitcast float %304 to i32 + %306 = icmp ne i32 %305, 0 + %307 = sext i1 %306 to i32 + %308 = bitcast i32 %307 to float + %309 = bitcast float %308 to i32 + %310 = xor i32 %309, -1 + %311 = bitcast i32 %310 to float + %312 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12) + %313 = extractelement <4 x float> %312, i32 0 + %314 = fcmp une float 0xC00574BC60000000, %313 + %315 = select i1 %314, float 1.000000e+00, float 0.000000e+00 + %316 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12) + %317 = extractelement <4 x float> %316, i32 1 + %318 = fcmp une float 0x40210068E0000000, %317 + %319 = select i1 %318, float 1.000000e+00, float 0.000000e+00 + %320 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12) + %321 = extractelement <4 x float> %320, i32 2 + %322 = fcmp une float 0xBFC9A6B500000000, %321 + %323 = select i1 %322, float 1.000000e+00, float 0.000000e+00 + %324 = fsub float -0.000000e+00, %315 + %325 = fptosi float %324 to i32 + %326 = fsub float -0.000000e+00, %319 + %327 = fptosi float %326 to i32 + %328 = fsub float -0.000000e+00, %323 + %329 = fptosi float %328 to i32 + %330 = bitcast i32 %325 to float + %331 = bitcast i32 %327 to float + %332 = bitcast i32 %329 to float + %333 = bitcast float %331 to i32 + %334 = bitcast float %332 to i32 + %335 = or i32 %333, %334 + %336 = bitcast i32 %335 to float + %337 = bitcast float %330 to i32 + %338 = bitcast float %336 to i32 + %339 = or i32 %337, %338 + %340 = bitcast i32 %339 to float + %341 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13) + %342 = extractelement <4 x float> %341, i32 0 + %343 = fcmp une float 0xC0119BDA60000000, %342 + %344 = select i1 %343, float 1.000000e+00, float 0.000000e+00 + %345 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13) + %346 = extractelement <4 x float> %345, i32 1 + %347 = fcmp une float 0xC02085D640000000, %346 + %348 = select i1 %347, float 1.000000e+00, float 0.000000e+00 + %349 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13) + %350 = extractelement <4 x float> %349, i32 2 + %351 = fcmp une float 0xBFD7C1BDA0000000, %350 + %352 = select i1 %351, float 1.000000e+00, float 0.000000e+00 + %353 = fsub float -0.000000e+00, %344 + %354 = fptosi float %353 to i32 + %355 = fsub float -0.000000e+00, %348 + %356 = fptosi float %355 to i32 + %357 = fsub float -0.000000e+00, %352 + %358 = fptosi float %357 to i32 + %359 = bitcast i32 %354 to float + %360 = bitcast i32 %356 to float + %361 = bitcast i32 %358 to float + %362 = bitcast float %360 to i32 + %363 = bitcast float %361 to i32 + %364 = or i32 %362, %363 + %365 = bitcast i32 %364 to float + %366 = bitcast float %359 to i32 + %367 = bitcast float %365 to i32 + %368 = or i32 %366, %367 + %369 = bitcast i32 %368 to float + %370 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14) + %371 = extractelement <4 x float> %370, i32 0 + %372 = fcmp une float 0x401E1D7DC0000000, %371 + %373 = select i1 %372, float 1.000000e+00, float 0.000000e+00 + %374 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14) + %375 = extractelement <4 x float> %374, i32 1 + %376 = fcmp une float 0xC019893740000000, %375 + %377 = select i1 %376, float 1.000000e+00, float 0.000000e+00 + %378 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14) + %379 = extractelement <4 x float> %378, i32 2 + %380 = fcmp une float 0x40220F0D80000000, %379 + %381 = select i1 %380, float 1.000000e+00, float 0.000000e+00 + %382 = fsub float -0.000000e+00, %373 + %383 = fptosi float %382 to i32 + %384 = fsub float -0.000000e+00, %377 + %385 = fptosi float %384 to i32 + %386 = fsub float -0.000000e+00, %381 + %387 = fptosi float %386 to i32 + %388 = bitcast i32 %383 to float + %389 = bitcast i32 %385 to float + %390 = bitcast i32 %387 to float + %391 = bitcast float %389 to i32 + %392 = bitcast float %390 to i32 + %393 = or i32 %391, %392 + %394 = bitcast i32 %393 to float + %395 = bitcast float %388 to i32 + %396 = bitcast float %394 to i32 + %397 = or i32 %395, %396 + %398 = bitcast i32 %397 to float + %399 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15) + %400 = extractelement <4 x float> %399, i32 0 + %401 = fcmp une float 0xC018E2EB20000000, %400 + %402 = select i1 %401, float 1.000000e+00, float 0.000000e+00 + %403 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15) + %404 = extractelement <4 x float> %403, i32 1 + %405 = fcmp une float 0xBFEA8DB8C0000000, %404 + %406 = select i1 %405, float 1.000000e+00, float 0.000000e+00 + %407 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15) + %408 = extractelement <4 x float> %407, i32 2 + %409 = fcmp une float 0x4015236E20000000, %408 + %410 = select i1 %409, float 1.000000e+00, float 0.000000e+00 + %411 = fsub float -0.000000e+00, %402 + %412 = fptosi float %411 to i32 + %413 = fsub float -0.000000e+00, %406 + %414 = fptosi float %413 to i32 + %415 = fsub float -0.000000e+00, %410 + %416 = fptosi float %415 to i32 + %417 = bitcast i32 %412 to float + %418 = bitcast i32 %414 to float + %419 = bitcast i32 %416 to float + %420 = bitcast float %418 to i32 + %421 = bitcast float %419 to i32 + %422 = or i32 %420, %421 + %423 = bitcast i32 %422 to float + %424 = bitcast float %417 to i32 + %425 = bitcast float %423 to i32 + %426 = or i32 %424, %425 + %427 = bitcast i32 %426 to float + %428 = insertelement <4 x float> undef, float %340, i32 0 + %429 = insertelement <4 x float> %428, float %369, i32 1 + %430 = insertelement <4 x float> %429, float %398, i32 2 + %431 = insertelement <4 x float> %430, float %427, i32 3 + %432 = insertelement <4 x float> undef, float %340, i32 0 + %433 = insertelement <4 x float> %432, float %369, i32 1 + %434 = insertelement <4 x float> %433, float %398, i32 2 + %435 = insertelement <4 x float> %434, float %427, i32 3 + %436 = call float @llvm.AMDGPU.dp4(<4 x float> %431, <4 x float> %435) + %437 = bitcast float %436 to i32 + %438 = icmp ne i32 %437, 0 + %439 = sext i1 %438 to i32 + %440 = bitcast i32 %439 to float + %441 = bitcast float %440 to i32 + %442 = xor i32 %441, -1 + %443 = bitcast i32 %442 to float + %444 = load <4 x float> addrspace(8)* null + %445 = extractelement <4 x float> %444, i32 0 + %446 = fcmp une float 0xC00574BC60000000, %445 + %447 = select i1 %446, float 1.000000e+00, float 0.000000e+00 + %448 = load <4 x float> addrspace(8)* null + %449 = extractelement <4 x float> %448, i32 1 + %450 = fcmp une float 0x40210068E0000000, %449 + %451 = select i1 %450, float 1.000000e+00, float 0.000000e+00 + %452 = load <4 x float> addrspace(8)* null + %453 = extractelement <4 x float> %452, i32 2 + %454 = fcmp une float 0xBFC9A6B500000000, %453 + %455 = select i1 %454, float 1.000000e+00, float 0.000000e+00 + %456 = load <4 x float> addrspace(8)* null + %457 = extractelement <4 x float> %456, i32 3 + %458 = fcmp une float 0xC0119BDA60000000, %457 + %459 = select i1 %458, float 1.000000e+00, float 0.000000e+00 + %460 = fsub float -0.000000e+00, %447 + %461 = fptosi float %460 to i32 + %462 = fsub float -0.000000e+00, %451 + %463 = fptosi float %462 to i32 + %464 = fsub float -0.000000e+00, %455 + %465 = fptosi float %464 to i32 + %466 = fsub float -0.000000e+00, %459 + %467 = fptosi float %466 to i32 + %468 = bitcast i32 %461 to float + %469 = bitcast i32 %463 to float + %470 = bitcast i32 %465 to float + %471 = bitcast i32 %467 to float + %472 = bitcast float %468 to i32 + %473 = bitcast float %469 to i32 + %474 = or i32 %472, %473 + %475 = bitcast i32 %474 to float + %476 = bitcast float %470 to i32 + %477 = bitcast float %471 to i32 + %478 = or i32 %476, %477 + %479 = bitcast i32 %478 to float + %480 = bitcast float %475 to i32 + %481 = bitcast float %479 to i32 + %482 = or i32 %480, %481 + %483 = bitcast i32 %482 to float + %484 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %485 = extractelement <4 x float> %484, i32 0 + %486 = fcmp une float 0xC02085D640000000, %485 + %487 = select i1 %486, float 1.000000e+00, float 0.000000e+00 + %488 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %489 = extractelement <4 x float> %488, i32 1 + %490 = fcmp une float 0xBFD7C1BDA0000000, %489 + %491 = select i1 %490, float 1.000000e+00, float 0.000000e+00 + %492 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %493 = extractelement <4 x float> %492, i32 2 + %494 = fcmp une float 0x401E1D7DC0000000, %493 + %495 = select i1 %494, float 1.000000e+00, float 0.000000e+00 + %496 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %497 = extractelement <4 x float> %496, i32 3 + %498 = fcmp une float 0xC019893740000000, %497 + %499 = select i1 %498, float 1.000000e+00, float 0.000000e+00 + %500 = fsub float -0.000000e+00, %487 + %501 = fptosi float %500 to i32 + %502 = fsub float -0.000000e+00, %491 + %503 = fptosi float %502 to i32 + %504 = fsub float -0.000000e+00, %495 + %505 = fptosi float %504 to i32 + %506 = fsub float -0.000000e+00, %499 + %507 = fptosi float %506 to i32 + %508 = bitcast i32 %501 to float + %509 = bitcast i32 %503 to float + %510 = bitcast i32 %505 to float + %511 = bitcast i32 %507 to float + %512 = bitcast float %508 to i32 + %513 = bitcast float %509 to i32 + %514 = or i32 %512, %513 + %515 = bitcast i32 %514 to float + %516 = bitcast float %510 to i32 + %517 = bitcast float %511 to i32 + %518 = or i32 %516, %517 + %519 = bitcast i32 %518 to float + %520 = bitcast float %515 to i32 + %521 = bitcast float %519 to i32 + %522 = or i32 %520, %521 + %523 = bitcast i32 %522 to float + %524 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %525 = extractelement <4 x float> %524, i32 0 + %526 = fcmp une float 0x40220F0D80000000, %525 + %527 = select i1 %526, float 1.000000e+00, float 0.000000e+00 + %528 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %529 = extractelement <4 x float> %528, i32 1 + %530 = fcmp une float 0xC018E2EB20000000, %529 + %531 = select i1 %530, float 1.000000e+00, float 0.000000e+00 + %532 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %533 = extractelement <4 x float> %532, i32 2 + %534 = fcmp une float 0xBFEA8DB8C0000000, %533 + %535 = select i1 %534, float 1.000000e+00, float 0.000000e+00 + %536 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %537 = extractelement <4 x float> %536, i32 3 + %538 = fcmp une float 0x4015236E20000000, %537 + %539 = select i1 %538, float 1.000000e+00, float 0.000000e+00 + %540 = fsub float -0.000000e+00, %527 + %541 = fptosi float %540 to i32 + %542 = fsub float -0.000000e+00, %531 + %543 = fptosi float %542 to i32 + %544 = fsub float -0.000000e+00, %535 + %545 = fptosi float %544 to i32 + %546 = fsub float -0.000000e+00, %539 + %547 = fptosi float %546 to i32 + %548 = bitcast i32 %541 to float + %549 = bitcast i32 %543 to float + %550 = bitcast i32 %545 to float + %551 = bitcast i32 %547 to float + %552 = bitcast float %548 to i32 + %553 = bitcast float %549 to i32 + %554 = or i32 %552, %553 + %555 = bitcast i32 %554 to float + %556 = bitcast float %550 to i32 + %557 = bitcast float %551 to i32 + %558 = or i32 %556, %557 + %559 = bitcast i32 %558 to float + %560 = bitcast float %555 to i32 + %561 = bitcast float %559 to i32 + %562 = or i32 %560, %561 + %563 = bitcast i32 %562 to float + %564 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) + %565 = extractelement <4 x float> %564, i32 0 + %566 = fcmp une float 0x4016ED5D00000000, %565 + %567 = select i1 %566, float 1.000000e+00, float 0.000000e+00 + %568 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) + %569 = extractelement <4 x float> %568, i32 1 + %570 = fcmp une float 0x402332FEC0000000, %569 + %571 = select i1 %570, float 1.000000e+00, float 0.000000e+00 + %572 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) + %573 = extractelement <4 x float> %572, i32 2 + %574 = fcmp une float 0xC01484B5E0000000, %573 + %575 = select i1 %574, float 1.000000e+00, float 0.000000e+00 + %576 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) + %577 = extractelement <4 x float> %576, i32 3 + %578 = fcmp une float 0x400179A6C0000000, %577 + %579 = select i1 %578, float 1.000000e+00, float 0.000000e+00 + %580 = fsub float -0.000000e+00, %567 + %581 = fptosi float %580 to i32 + %582 = fsub float -0.000000e+00, %571 + %583 = fptosi float %582 to i32 + %584 = fsub float -0.000000e+00, %575 + %585 = fptosi float %584 to i32 + %586 = fsub float -0.000000e+00, %579 + %587 = fptosi float %586 to i32 + %588 = bitcast i32 %581 to float + %589 = bitcast i32 %583 to float + %590 = bitcast i32 %585 to float + %591 = bitcast i32 %587 to float + %592 = bitcast float %588 to i32 + %593 = bitcast float %589 to i32 + %594 = or i32 %592, %593 + %595 = bitcast i32 %594 to float + %596 = bitcast float %590 to i32 + %597 = bitcast float %591 to i32 + %598 = or i32 %596, %597 + %599 = bitcast i32 %598 to float + %600 = bitcast float %595 to i32 + %601 = bitcast float %599 to i32 + %602 = or i32 %600, %601 + %603 = bitcast i32 %602 to float + %604 = insertelement <4 x float> undef, float %483, i32 0 + %605 = insertelement <4 x float> %604, float %523, i32 1 + %606 = insertelement <4 x float> %605, float %563, i32 2 + %607 = insertelement <4 x float> %606, float %603, i32 3 + %608 = insertelement <4 x float> undef, float %483, i32 0 + %609 = insertelement <4 x float> %608, float %523, i32 1 + %610 = insertelement <4 x float> %609, float %563, i32 2 + %611 = insertelement <4 x float> %610, float %603, i32 3 + %612 = call float @llvm.AMDGPU.dp4(<4 x float> %607, <4 x float> %611) + %613 = bitcast float %612 to i32 + %614 = icmp ne i32 %613, 0 + %615 = sext i1 %614 to i32 + %616 = bitcast i32 %615 to float + %617 = bitcast float %616 to i32 + %618 = xor i32 %617, -1 + %619 = bitcast i32 %618 to float + %620 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) + %621 = extractelement <4 x float> %620, i32 0 + %622 = fcmp une float 0x40210068E0000000, %621 + %623 = select i1 %622, float 1.000000e+00, float 0.000000e+00 + %624 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) + %625 = extractelement <4 x float> %624, i32 1 + %626 = fcmp une float 0xBFC9A6B500000000, %625 + %627 = select i1 %626, float 1.000000e+00, float 0.000000e+00 + %628 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) + %629 = extractelement <4 x float> %628, i32 2 + %630 = fcmp une float 0xC0119BDA60000000, %629 + %631 = select i1 %630, float 1.000000e+00, float 0.000000e+00 + %632 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) + %633 = extractelement <4 x float> %632, i32 3 + %634 = fcmp une float 0xC02085D640000000, %633 + %635 = select i1 %634, float 1.000000e+00, float 0.000000e+00 + %636 = fsub float -0.000000e+00, %623 + %637 = fptosi float %636 to i32 + %638 = fsub float -0.000000e+00, %627 + %639 = fptosi float %638 to i32 + %640 = fsub float -0.000000e+00, %631 + %641 = fptosi float %640 to i32 + %642 = fsub float -0.000000e+00, %635 + %643 = fptosi float %642 to i32 + %644 = bitcast i32 %637 to float + %645 = bitcast i32 %639 to float + %646 = bitcast i32 %641 to float + %647 = bitcast i32 %643 to float + %648 = bitcast float %644 to i32 + %649 = bitcast float %645 to i32 + %650 = or i32 %648, %649 + %651 = bitcast i32 %650 to float + %652 = bitcast float %646 to i32 + %653 = bitcast float %647 to i32 + %654 = or i32 %652, %653 + %655 = bitcast i32 %654 to float + %656 = bitcast float %651 to i32 + %657 = bitcast float %655 to i32 + %658 = or i32 %656, %657 + %659 = bitcast i32 %658 to float + %660 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) + %661 = extractelement <4 x float> %660, i32 0 + %662 = fcmp une float 0xBFD7C1BDA0000000, %661 + %663 = select i1 %662, float 1.000000e+00, float 0.000000e+00 + %664 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) + %665 = extractelement <4 x float> %664, i32 1 + %666 = fcmp une float 0x401E1D7DC0000000, %665 + %667 = select i1 %666, float 1.000000e+00, float 0.000000e+00 + %668 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) + %669 = extractelement <4 x float> %668, i32 2 + %670 = fcmp une float 0xC019893740000000, %669 + %671 = select i1 %670, float 1.000000e+00, float 0.000000e+00 + %672 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) + %673 = extractelement <4 x float> %672, i32 3 + %674 = fcmp une float 0x40220F0D80000000, %673 + %675 = select i1 %674, float 1.000000e+00, float 0.000000e+00 + %676 = fsub float -0.000000e+00, %663 + %677 = fptosi float %676 to i32 + %678 = fsub float -0.000000e+00, %667 + %679 = fptosi float %678 to i32 + %680 = fsub float -0.000000e+00, %671 + %681 = fptosi float %680 to i32 + %682 = fsub float -0.000000e+00, %675 + %683 = fptosi float %682 to i32 + %684 = bitcast i32 %677 to float + %685 = bitcast i32 %679 to float + %686 = bitcast i32 %681 to float + %687 = bitcast i32 %683 to float + %688 = bitcast float %684 to i32 + %689 = bitcast float %685 to i32 + %690 = or i32 %688, %689 + %691 = bitcast i32 %690 to float + %692 = bitcast float %686 to i32 + %693 = bitcast float %687 to i32 + %694 = or i32 %692, %693 + %695 = bitcast i32 %694 to float + %696 = bitcast float %691 to i32 + %697 = bitcast float %695 to i32 + %698 = or i32 %696, %697 + %699 = bitcast i32 %698 to float + %700 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6) + %701 = extractelement <4 x float> %700, i32 0 + %702 = fcmp une float 0xC018E2EB20000000, %701 + %703 = select i1 %702, float 1.000000e+00, float 0.000000e+00 + %704 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6) + %705 = extractelement <4 x float> %704, i32 1 + %706 = fcmp une float 0xBFEA8DB8C0000000, %705 + %707 = select i1 %706, float 1.000000e+00, float 0.000000e+00 + %708 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6) + %709 = extractelement <4 x float> %708, i32 2 + %710 = fcmp une float 0x4015236E20000000, %709 + %711 = select i1 %710, float 1.000000e+00, float 0.000000e+00 + %712 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6) + %713 = extractelement <4 x float> %712, i32 3 + %714 = fcmp une float 0x4016ED5D00000000, %713 + %715 = select i1 %714, float 1.000000e+00, float 0.000000e+00 + %716 = fsub float -0.000000e+00, %703 + %717 = fptosi float %716 to i32 + %718 = fsub float -0.000000e+00, %707 + %719 = fptosi float %718 to i32 + %720 = fsub float -0.000000e+00, %711 + %721 = fptosi float %720 to i32 + %722 = fsub float -0.000000e+00, %715 + %723 = fptosi float %722 to i32 + %724 = bitcast i32 %717 to float + %725 = bitcast i32 %719 to float + %726 = bitcast i32 %721 to float + %727 = bitcast i32 %723 to float + %728 = bitcast float %724 to i32 + %729 = bitcast float %725 to i32 + %730 = or i32 %728, %729 + %731 = bitcast i32 %730 to float + %732 = bitcast float %726 to i32 + %733 = bitcast float %727 to i32 + %734 = or i32 %732, %733 + %735 = bitcast i32 %734 to float + %736 = bitcast float %731 to i32 + %737 = bitcast float %735 to i32 + %738 = or i32 %736, %737 + %739 = bitcast i32 %738 to float + %740 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7) + %741 = extractelement <4 x float> %740, i32 0 + %742 = fcmp une float 0x402332FEC0000000, %741 + %743 = select i1 %742, float 1.000000e+00, float 0.000000e+00 + %744 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7) + %745 = extractelement <4 x float> %744, i32 1 + %746 = fcmp une float 0xC01484B5E0000000, %745 + %747 = select i1 %746, float 1.000000e+00, float 0.000000e+00 + %748 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7) + %749 = extractelement <4 x float> %748, i32 2 + %750 = fcmp une float 0x400179A6C0000000, %749 + %751 = select i1 %750, float 1.000000e+00, float 0.000000e+00 + %752 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7) + %753 = extractelement <4 x float> %752, i32 3 + %754 = fcmp une float 0xBFEE752540000000, %753 + %755 = select i1 %754, float 1.000000e+00, float 0.000000e+00 + %756 = fsub float -0.000000e+00, %743 + %757 = fptosi float %756 to i32 + %758 = fsub float -0.000000e+00, %747 + %759 = fptosi float %758 to i32 + %760 = fsub float -0.000000e+00, %751 + %761 = fptosi float %760 to i32 + %762 = fsub float -0.000000e+00, %755 + %763 = fptosi float %762 to i32 + %764 = bitcast i32 %757 to float + %765 = bitcast i32 %759 to float + %766 = bitcast i32 %761 to float + %767 = bitcast i32 %763 to float + %768 = bitcast float %764 to i32 + %769 = bitcast float %765 to i32 + %770 = or i32 %768, %769 + %771 = bitcast i32 %770 to float + %772 = bitcast float %766 to i32 + %773 = bitcast float %767 to i32 + %774 = or i32 %772, %773 + %775 = bitcast i32 %774 to float + %776 = bitcast float %771 to i32 + %777 = bitcast float %775 to i32 + %778 = or i32 %776, %777 + %779 = bitcast i32 %778 to float + %780 = insertelement <4 x float> undef, float %659, i32 0 + %781 = insertelement <4 x float> %780, float %699, i32 1 + %782 = insertelement <4 x float> %781, float %739, i32 2 + %783 = insertelement <4 x float> %782, float %779, i32 3 + %784 = insertelement <4 x float> undef, float %659, i32 0 + %785 = insertelement <4 x float> %784, float %699, i32 1 + %786 = insertelement <4 x float> %785, float %739, i32 2 + %787 = insertelement <4 x float> %786, float %779, i32 3 + %788 = call float @llvm.AMDGPU.dp4(<4 x float> %783, <4 x float> %787) + %789 = bitcast float %788 to i32 + %790 = icmp ne i32 %789, 0 + %791 = sext i1 %790 to i32 + %792 = bitcast i32 %791 to float + %793 = bitcast float %792 to i32 + %794 = xor i32 %793, -1 + %795 = bitcast i32 %794 to float + %796 = bitcast float %91 to i32 + %797 = bitcast float %179 to i32 + %798 = and i32 %796, %797 + %799 = bitcast i32 %798 to float + %800 = bitcast float %311 to i32 + %801 = bitcast float %443 to i32 + %802 = and i32 %800, %801 + %803 = bitcast i32 %802 to float + %804 = bitcast float %799 to i32 + %805 = bitcast float %803 to i32 + %806 = and i32 %804, %805 + %807 = bitcast i32 %806 to float + %808 = bitcast float %619 to i32 + %809 = bitcast float %795 to i32 + %810 = and i32 %808, %809 + %811 = bitcast i32 %810 to float + %812 = bitcast float %807 to i32 + %813 = bitcast float %811 to i32 + %814 = and i32 %812, %813 + %815 = bitcast i32 %814 to float + %816 = bitcast float %815 to i32 + %817 = icmp ne i32 %816, 0 + %. = select i1 %817, float 1.000000e+00, float 0.000000e+00 + %.32 = select i1 %817, float 0.000000e+00, float 1.000000e+00 + %818 = insertelement <4 x float> undef, float %0, i32 0 + %819 = insertelement <4 x float> %818, float %1, i32 1 + %820 = insertelement <4 x float> %819, float %2, i32 2 + %821 = insertelement <4 x float> %820, float %3, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %821, i32 60, i32 1) + %822 = insertelement <4 x float> undef, float %.32, i32 0 + %823 = insertelement <4 x float> %822, float %., i32 1 + %824 = insertelement <4 x float> %823, float 0.000000e+00, i32 2 + %825 = insertelement <4 x float> %824, float 1.000000e+00, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %825, i32 0, i32 2) + ret void +} + +declare float @llvm.R600.load.input(i32) #1 + +declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 + +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { "ShaderType"="1" } +attributes #1 = { readnone } diff --git a/test/CodeGen/R600/and.v4i32.ll b/test/CodeGen/R600/and.v4i32.ll new file mode 100644 index 000000000000..662085e2d673 --- /dev/null +++ b/test/CodeGen/R600/and.v4i32.ll @@ -0,0 +1,15 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32> addrspace(1) * %in + %b = load <4 x i32> addrspace(1) * %b_ptr + %result = and <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll b/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll new file mode 100644 index 000000000000..fd958b365961 --- /dev/null +++ b/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll @@ -0,0 +1,36 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; This test is for a bug in +; DAGCombiner::reduceBuildVecConvertToConvertBuildVec() where +; the wrong type was being passed to +; TargetLowering::getOperationAction() when checking the legality of +; ISD::UINT_TO_FP and ISD::SINT_TO_FP opcodes. + + +; CHECK: @sint +; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @sint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) { +entry: + %ptr = getelementptr i32 addrspace(1)* %in, i32 1 + %sint = load i32 addrspace(1) * %in + %conv = sitofp i32 %sint to float + %0 = insertelement <4 x float> undef, float %conv, i32 0 + %splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer + store <4 x float> %splat, <4 x float> addrspace(1)* %out + ret void +} + +;CHECK: @uint +;CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @uint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) { +entry: + %ptr = getelementptr i32 addrspace(1)* %in, i32 1 + %uint = load i32 addrspace(1) * %in + %conv = uitofp i32 %uint to float + %0 = insertelement <4 x float> undef, float %conv, i32 0 + %splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer + store <4 x float> %splat, <4 x float> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/disconnected-predset-break-bug.ll b/test/CodeGen/R600/disconnected-predset-break-bug.ll new file mode 100644 index 000000000000..09baee7a1dcd --- /dev/null +++ b/test/CodeGen/R600/disconnected-predset-break-bug.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; PRED_SET* instructions must be tied to any instruction that uses their +; result. This tests that there are no instructions between the PRED_SET* +; and the PREDICATE_BREAK in this loop. + +; CHECK: @loop_ge +; CHECK: LOOP_START_DX10 +; CHECK: PRED_SET +; CHECK-NEXT: JUMP +; CHECK-NEXT: LOOP_BREAK +define void @loop_ge(i32 addrspace(1)* nocapture %out, i32 %iterations) nounwind { +entry: + %cmp5 = icmp sgt i32 %iterations, 0 + br i1 %cmp5, label %for.body, label %for.end + +for.body: ; preds = %for.body, %entry + %i.07.in = phi i32 [ %i.07, %for.body ], [ %iterations, %entry ] + %ai.06 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %i.07 = add nsw i32 %i.07.in, -1 + %arrayidx = getelementptr inbounds i32 addrspace(1)* %out, i32 %ai.06 + store i32 %i.07, i32 addrspace(1)* %arrayidx, align 4 + %add = add nsw i32 %ai.06, 1 + %exitcond = icmp eq i32 %add, %iterations + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} diff --git a/test/CodeGen/R600/fabs.ll b/test/CodeGen/R600/fabs.ll new file mode 100644 index 000000000000..0407533eaa5f --- /dev/null +++ b/test/CodeGen/R600/fabs.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: MOV T{{[0-9]+\.[XYZW], \|T[0-9]+\.[XYZW]\|}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @fabs( float %r0) + call void @llvm.AMDGPU.store.output(float %r1, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + +declare float @fabs(float ) readnone diff --git a/test/CodeGen/R600/fadd.ll b/test/CodeGen/R600/fadd.ll new file mode 100644 index 000000000000..d7d1b6572c41 --- /dev/null +++ b/test/CodeGen/R600/fadd.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.R600.load.input(i32 1) + %r2 = fadd float %r0, %r1 + call void @llvm.AMDGPU.store.output(float %r2, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + diff --git a/test/CodeGen/R600/fadd.v4f32.ll b/test/CodeGen/R600/fadd.v4f32.ll new file mode 100644 index 000000000000..85dbfd52cbb3 --- /dev/null +++ b/test/CodeGen/R600/fadd.v4f32.ll @@ -0,0 +1,15 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1 + %a = load <4 x float> addrspace(1) * %in + %b = load <4 x float> addrspace(1) * %b_ptr + %result = fadd <4 x float> %a, %b + store <4 x float> %result, <4 x float> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fcmp-cnd.ll b/test/CodeGen/R600/fcmp-cnd.ll new file mode 100644 index 000000000000..a94cfb5cf2fe --- /dev/null +++ b/test/CodeGen/R600/fcmp-cnd.ll @@ -0,0 +1,14 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;Not checking arguments 2 and 3 to CNDE, because they may change between +;registers and literal.x depending on what the optimizer does. +;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) { +entry: + %0 = load float addrspace(1)* %in + %cmp = fcmp oeq float %0, 0.000000e+00 + %value = select i1 %cmp, i32 2, i32 3 + store i32 %value, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fcmp-cnde-int-args.ll b/test/CodeGen/R600/fcmp-cnde-int-args.ll new file mode 100644 index 000000000000..55aba0d72d39 --- /dev/null +++ b/test/CodeGen/R600/fcmp-cnde-int-args.ll @@ -0,0 +1,16 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; This test checks a bug in R600TargetLowering::LowerSELECT_CC where the +; chance to optimize the fcmp + select instructions to SET* was missed +; due to the fact that the operands to fcmp and select had different types + +; CHECK: SET{{[A-Z]+}}_DX10 + +define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) { +entry: + %0 = load float addrspace(1)* %in + %cmp = fcmp oeq float %0, 0.000000e+00 + %value = select i1 %cmp, i32 -1, i32 0 + store i32 %value, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fcmp.ll b/test/CodeGen/R600/fcmp.ll new file mode 100644 index 000000000000..37f621d23958 --- /dev/null +++ b/test/CodeGen/R600/fcmp.ll @@ -0,0 +1,37 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: @fcmp_sext +; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @fcmp_sext(i32 addrspace(1)* %out, float addrspace(1)* %in) { +entry: + %0 = load float addrspace(1)* %in + %arrayidx1 = getelementptr inbounds float addrspace(1)* %in, i32 1 + %1 = load float addrspace(1)* %arrayidx1 + %cmp = fcmp oeq float %0, %1 + %sext = sext i1 %cmp to i32 + store i32 %sext, i32 addrspace(1)* %out + ret void +} + +; This test checks that a setcc node with f32 operands is lowered to a +; SET*_DX10 instruction. Previously we were lowering this to: +; SET* + FP_TO_SINT + +; CHECK: @fcmp_br +; CHECK: SET{{[N]*}}E_DX10 T{{[0-9]+\.[XYZW], [a-zA-Z0-9, .]+}}(5.0 + +define void @fcmp_br(i32 addrspace(1)* %out, float %in) { +entry: + %0 = fcmp oeq float %in, 5.0 + br i1 %0, label %IF, label %ENDIF + +IF: + %1 = getelementptr i32 addrspace(1)* %out, i32 1 + store i32 0, i32 addrspace(1)* %1 + br label %ENDIF + +ENDIF: + store i32 0, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fdiv.v4f32.ll b/test/CodeGen/R600/fdiv.v4f32.ll new file mode 100644 index 000000000000..79e677f541f5 --- /dev/null +++ b/test/CodeGen/R600/fdiv.v4f32.ll @@ -0,0 +1,19 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1 + %a = load <4 x float> addrspace(1) * %in + %b = load <4 x float> addrspace(1) * %b_ptr + %result = fdiv <4 x float> %a, %b + store <4 x float> %result, <4 x float> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/floor.ll b/test/CodeGen/R600/floor.ll new file mode 100644 index 000000000000..845330f28419 --- /dev/null +++ b/test/CodeGen/R600/floor.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: FLOOR T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @floor(float %r0) + call void @llvm.AMDGPU.store.output(float %r1, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + +declare float @floor(float) readonly diff --git a/test/CodeGen/R600/fmad.ll b/test/CodeGen/R600/fmad.ll new file mode 100644 index 000000000000..a3d4d0ff0db7 --- /dev/null +++ b/test/CodeGen/R600/fmad.ll @@ -0,0 +1,19 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: MULADD_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.R600.load.input(i32 1) + %r2 = call float @llvm.R600.load.input(i32 2) + %r3 = fmul float %r0, %r1 + %r4 = fadd float %r3, %r2 + call void @llvm.AMDGPU.store.output(float %r4, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + +declare float @fabs(float ) readnone diff --git a/test/CodeGen/R600/fmax.ll b/test/CodeGen/R600/fmax.ll new file mode 100644 index 000000000000..3708f0b9eed2 --- /dev/null +++ b/test/CodeGen/R600/fmax.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: MAX T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.R600.load.input(i32 1) + %r2 = fcmp uge float %r0, %r1 + %r3 = select i1 %r2, float %r0, float %r1 + call void @llvm.AMDGPU.store.output(float %r3, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) diff --git a/test/CodeGen/R600/fmin.ll b/test/CodeGen/R600/fmin.ll new file mode 100644 index 000000000000..19d59ab3061e --- /dev/null +++ b/test/CodeGen/R600/fmin.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: MIN T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.R600.load.input(i32 1) + %r2 = fcmp uge float %r0, %r1 + %r3 = select i1 %r2, float %r1, float %r0 + call void @llvm.AMDGPU.store.output(float %r3, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) diff --git a/test/CodeGen/R600/fmul.ll b/test/CodeGen/R600/fmul.ll new file mode 100644 index 000000000000..eb1d523c0bb4 --- /dev/null +++ b/test/CodeGen/R600/fmul.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.R600.load.input(i32 1) + %r2 = fmul float %r0, %r1 + call void @llvm.AMDGPU.store.output(float %r2, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + diff --git a/test/CodeGen/R600/fmul.v4f32.ll b/test/CodeGen/R600/fmul.v4f32.ll new file mode 100644 index 000000000000..6d44a0c5c782 --- /dev/null +++ b/test/CodeGen/R600/fmul.v4f32.ll @@ -0,0 +1,15 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1 + %a = load <4 x float> addrspace(1) * %in + %b = load <4 x float> addrspace(1) * %b_ptr + %result = fmul <4 x float> %a, %b + store <4 x float> %result, <4 x float> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fsub.ll b/test/CodeGen/R600/fsub.ll new file mode 100644 index 000000000000..591aa52676a4 --- /dev/null +++ b/test/CodeGen/R600/fsub.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.R600.load.input(i32 1) + %r2 = fsub float %r0, %r1 + call void @llvm.AMDGPU.store.output(float %r2, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + diff --git a/test/CodeGen/R600/fsub.v4f32.ll b/test/CodeGen/R600/fsub.v4f32.ll new file mode 100644 index 000000000000..612a57e4b609 --- /dev/null +++ b/test/CodeGen/R600/fsub.v4f32.ll @@ -0,0 +1,15 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1 + %a = load <4 x float> addrspace(1) * %in + %b = load <4 x float> addrspace(1) * %b_ptr + %result = fsub <4 x float> %a, %b + store <4 x float> %result, <4 x float> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/i8_to_double_to_float.ll b/test/CodeGen/R600/i8_to_double_to_float.ll new file mode 100644 index 000000000000..39f33227fa4b --- /dev/null +++ b/test/CodeGen/R600/i8_to_double_to_float.ll @@ -0,0 +1,11 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(float addrspace(1)* %out, i8 addrspace(1)* %in) { + %1 = load i8 addrspace(1)* %in + %2 = uitofp i8 %1 to double + %3 = fptrunc double %2 to float + store float %3, float addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/icmp-select-sete-reverse-args.ll b/test/CodeGen/R600/icmp-select-sete-reverse-args.ll new file mode 100644 index 000000000000..71705a64f50e --- /dev/null +++ b/test/CodeGen/R600/icmp-select-sete-reverse-args.ll @@ -0,0 +1,18 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;Test that a select with reversed True/False values is correctly lowered +;to a SETNE_INT. There should only be one SETNE_INT instruction. + +;CHECK: SETNE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK-NOT: SETNE_INT + +define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { +entry: + %0 = load i32 addrspace(1)* %in + %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %in, i32 1 + %1 = load i32 addrspace(1)* %arrayidx1 + %cmp = icmp eq i32 %0, %1 + %value = select i1 %cmp, i32 0, i32 -1 + store i32 %value, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/jump_address.ll b/test/CodeGen/R600/jump_address.ll new file mode 100644 index 000000000000..cd35bffb1304 --- /dev/null +++ b/test/CodeGen/R600/jump_address.ll @@ -0,0 +1,50 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: JUMP @4 + +define void @main() #0 { +main_body: + %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %1 = extractelement <4 x float> %0, i32 0 + %2 = bitcast float %1 to i32 + %3 = icmp eq i32 %2, 0 + %4 = sext i1 %3 to i32 + %5 = bitcast i32 %4 to float + %6 = bitcast float %5 to i32 + %7 = icmp ne i32 %6, 0 + br i1 %7, label %ENDIF, label %ELSE + +ELSE: ; preds = %main_body + %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %9 = extractelement <4 x float> %8, i32 0 + %10 = bitcast float %9 to i32 + %11 = icmp eq i32 %10, 1 + %12 = sext i1 %11 to i32 + %13 = bitcast i32 %12 to float + %14 = bitcast float %13 to i32 + %15 = icmp ne i32 %14, 0 + br i1 %15, label %IF13, label %ENDIF + +ENDIF: ; preds = %IF13, %ELSE, %main_body + %temp.0 = phi float [ 0xFFF8000000000000, %main_body ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF13 ] + %temp1.0 = phi float [ 0.000000e+00, %main_body ], [ %23, %IF13 ], [ 0.000000e+00, %ELSE ] + %temp2.0 = phi float [ 1.000000e+00, %main_body ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF13 ] + %temp3.0 = phi float [ 5.000000e-01, %main_body ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF13 ] + %16 = insertelement <4 x float> undef, float %temp.0, i32 0 + %17 = insertelement <4 x float> %16, float %temp1.0, i32 1 + %18 = insertelement <4 x float> %17, float %temp2.0, i32 2 + %19 = insertelement <4 x float> %18, float %temp3.0, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %19, i32 0, i32 0) + ret void + +IF13: ; preds = %ELSE + %20 = load <4 x float> addrspace(8)* null + %21 = extractelement <4 x float> %20, i32 0 + %22 = fsub float -0.000000e+00, %21 + %23 = fadd float 0xFFF8000000000000, %22 + br label %ENDIF +} + +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { "ShaderType"="0" } diff --git a/test/CodeGen/R600/kcache-fold.ll b/test/CodeGen/R600/kcache-fold.ll new file mode 100644 index 000000000000..3d70e4bd54aa --- /dev/null +++ b/test/CodeGen/R600/kcache-fold.ll @@ -0,0 +1,100 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: @main1 +; CHECK: MOV T{{[0-9]+\.[XYZW], KC0}} +define void @main1() { +main_body: + %0 = load <4 x float> addrspace(8)* null + %1 = extractelement <4 x float> %0, i32 0 + %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %3 = extractelement <4 x float> %2, i32 0 + %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %5 = extractelement <4 x float> %4, i32 0 + %6 = fcmp ult float %1, 0.000000e+00 + %7 = select i1 %6, float %3, float %5 + %8 = load <4 x float> addrspace(8)* null + %9 = extractelement <4 x float> %8, i32 1 + %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %11 = extractelement <4 x float> %10, i32 1 + %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %13 = extractelement <4 x float> %12, i32 1 + %14 = fcmp ult float %9, 0.000000e+00 + %15 = select i1 %14, float %11, float %13 + %16 = load <4 x float> addrspace(8)* null + %17 = extractelement <4 x float> %16, i32 2 + %18 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %19 = extractelement <4 x float> %18, i32 2 + %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %21 = extractelement <4 x float> %20, i32 2 + %22 = fcmp ult float %17, 0.000000e+00 + %23 = select i1 %22, float %19, float %21 + %24 = load <4 x float> addrspace(8)* null + %25 = extractelement <4 x float> %24, i32 3 + %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %27 = extractelement <4 x float> %26, i32 3 + %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %29 = extractelement <4 x float> %28, i32 3 + %30 = fcmp ult float %25, 0.000000e+00 + %31 = select i1 %30, float %27, float %29 + %32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00) + %33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00) + %34 = call float @llvm.AMDIL.clamp.(float %23, float 0.000000e+00, float 1.000000e+00) + %35 = call float @llvm.AMDIL.clamp.(float %31, float 0.000000e+00, float 1.000000e+00) + %36 = insertelement <4 x float> undef, float %32, i32 0 + %37 = insertelement <4 x float> %36, float %33, i32 1 + %38 = insertelement <4 x float> %37, float %34, i32 2 + %39 = insertelement <4 x float> %38, float %35, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %39, i32 0, i32 0) + ret void +} + +; CHECK: @main2 +; CHECK-NOT: MOV +define void @main2() { +main_body: + %0 = load <4 x float> addrspace(8)* null + %1 = extractelement <4 x float> %0, i32 0 + %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %3 = extractelement <4 x float> %2, i32 0 + %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %5 = extractelement <4 x float> %4, i32 1 + %6 = fcmp ult float %1, 0.000000e+00 + %7 = select i1 %6, float %3, float %5 + %8 = load <4 x float> addrspace(8)* null + %9 = extractelement <4 x float> %8, i32 1 + %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %11 = extractelement <4 x float> %10, i32 0 + %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %13 = extractelement <4 x float> %12, i32 1 + %14 = fcmp ult float %9, 0.000000e+00 + %15 = select i1 %14, float %11, float %13 + %16 = load <4 x float> addrspace(8)* null + %17 = extractelement <4 x float> %16, i32 2 + %18 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %19 = extractelement <4 x float> %18, i32 3 + %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %21 = extractelement <4 x float> %20, i32 2 + %22 = fcmp ult float %17, 0.000000e+00 + %23 = select i1 %22, float %19, float %21 + %24 = load <4 x float> addrspace(8)* null + %25 = extractelement <4 x float> %24, i32 3 + %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %27 = extractelement <4 x float> %26, i32 3 + %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %29 = extractelement <4 x float> %28, i32 2 + %30 = fcmp ult float %25, 0.000000e+00 + %31 = select i1 %30, float %27, float %29 + %32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00) + %33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00) + %34 = call float @llvm.AMDIL.clamp.(float %23, float 0.000000e+00, float 1.000000e+00) + %35 = call float @llvm.AMDIL.clamp.(float %31, float 0.000000e+00, float 1.000000e+00) + %36 = insertelement <4 x float> undef, float %32, i32 0 + %37 = insertelement <4 x float> %36, float %33, i32 1 + %38 = insertelement <4 x float> %37, float %34, i32 2 + %39 = insertelement <4 x float> %38, float %35, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %39, i32 0, i32 0) + ret void +} + +declare float @llvm.AMDIL.clamp.(float, float, float) readnone +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) diff --git a/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll b/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll new file mode 100644 index 000000000000..1aae7f9f91f4 --- /dev/null +++ b/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; This tests a bug where LegalizeDAG was not checking the target's +; BooleanContents value and always using one for true, when expanding +; setcc to select_cc. +; +; This bug caused the icmp IR instruction to be expanded to two machine +; instructions, when only one is needed. +; + +; CHECK: @setcc_expand +; CHECK: SET +; CHECK-NOT: CND +define void @setcc_expand(i32 addrspace(1)* %out, i32 %in) { +entry: + %0 = icmp eq i32 %in, 5 + br i1 %0, label %IF, label %ENDIF +IF: + %1 = getelementptr i32 addrspace(1)* %out, i32 1 + store i32 0, i32 addrspace(1)* %1 + br label %ENDIF + +ENDIF: + store i32 0, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/lit.local.cfg b/test/CodeGen/R600/lit.local.cfg new file mode 100644 index 000000000000..36ee493e5945 --- /dev/null +++ b/test/CodeGen/R600/lit.local.cfg @@ -0,0 +1,13 @@ +config.suffixes = ['.ll', '.c', '.cpp'] + +def getRoot(config): + if not config.parent: + return config + return getRoot(config.parent) + +root = getRoot(config) + +targets = set(root.targets_to_build.split()) +if not 'R600' in targets: + config.unsupported = True + diff --git a/test/CodeGen/R600/literals.ll b/test/CodeGen/R600/literals.ll new file mode 100644 index 000000000000..e69f64e0e142 --- /dev/null +++ b/test/CodeGen/R600/literals.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; Test using an integer literal constant. +; Generated ASM should be: +; ADD_INT REG literal.x, 5 +; or +; ADD_INT literal.x REG, 5 + +; CHECK: @i32_literal +; CHECK: ADD_INT {{[A-Z0-9,. ]*}}literal.x,{{[A-Z0-9,. ]*}} 5 +define void @i32_literal(i32 addrspace(1)* %out, i32 %in) { +entry: + %0 = add i32 5, %in + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; Test using a float literal constant. +; Generated ASM should be: +; ADD REG literal.x, 5.0 +; or +; ADD literal.x REG, 5.0 + +; CHECK: @float_literal +; CHECK: ADD {{[A-Z0-9,. ]*}}literal.x,{{[A-Z0-9,. ]*}} {{[0-9]+}}(5.0 +define void @float_literal(float addrspace(1)* %out, float %in) { +entry: + %0 = fadd float 5.0, %in + store float %0, float addrspace(1)* %out + ret void +} + diff --git a/test/CodeGen/R600/llvm.AMDGPU.mul.ll b/test/CodeGen/R600/llvm.AMDGPU.mul.ll new file mode 100644 index 000000000000..693eb27457c2 --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.mul.ll @@ -0,0 +1,17 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.R600.load.input(i32 1) + %r2 = call float @llvm.AMDGPU.mul( float %r0, float %r1) + call void @llvm.AMDGPU.store.output(float %r2, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + +declare float @llvm.AMDGPU.mul(float ,float ) readnone diff --git a/test/CodeGen/R600/llvm.AMDGPU.tex.ll b/test/CodeGen/R600/llvm.AMDGPU.tex.ll new file mode 100644 index 000000000000..74331fa26934 --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.tex.ll @@ -0,0 +1,42 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 1 +;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 2 +;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 3 +;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 4 +;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 5 +;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 6 +;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 7 +;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 8 +;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 9 +;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 10 +;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 11 +;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 12 +;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 13 +;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 14 +;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 15 +;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 16 + +define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %addr = load <4 x float> addrspace(1)* %in + %res1 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %addr, i32 0, i32 0, i32 1) + %res2 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res1, i32 0, i32 0, i32 2) + %res3 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res2, i32 0, i32 0, i32 3) + %res4 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res3, i32 0, i32 0, i32 4) + %res5 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res4, i32 0, i32 0, i32 5) + %res6 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res5, i32 0, i32 0, i32 6) + %res7 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res6, i32 0, i32 0, i32 7) + %res8 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res7, i32 0, i32 0, i32 8) + %res9 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res8, i32 0, i32 0, i32 9) + %res10 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res9, i32 0, i32 0, i32 10) + %res11 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res10, i32 0, i32 0, i32 11) + %res12 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res11, i32 0, i32 0, i32 12) + %res13 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res12, i32 0, i32 0, i32 13) + %res14 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res13, i32 0, i32 0, i32 14) + %res15 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res14, i32 0, i32 0, i32 15) + %res16 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res15, i32 0, i32 0, i32 16) + store <4 x float> %res16, <4 x float> addrspace(1)* %out + ret void +} + +declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone diff --git a/test/CodeGen/R600/llvm.AMDGPU.trunc.ll b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll new file mode 100644 index 000000000000..fac957f7eeec --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: TRUNC T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.AMDGPU.trunc( float %r0) + call void @llvm.AMDGPU.store.output(float %r1, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + +declare float @llvm.AMDGPU.trunc(float ) readnone diff --git a/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll new file mode 100644 index 000000000000..bf0cdaa2fa3a --- /dev/null +++ b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll @@ -0,0 +1,21 @@ +;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s + +;CHECK: S_MOV_B32 +;CHECK-NEXT: V_INTERP_MOV_F32 + +define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" { +main_body: + %4 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3) + %5 = call i32 @llvm.SI.packf16(float %4, float %4) + %6 = bitcast i32 %5 to float + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %6, float %6, float %6, float %6) + ret void +} + +declare void @llvm.AMDGPU.shader.type(i32) + +declare float @llvm.SI.fs.constant(i32, i32, i32) readnone + +declare i32 @llvm.SI.packf16(float, float) readnone + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) diff --git a/test/CodeGen/R600/llvm.SI.sample.ll b/test/CodeGen/R600/llvm.SI.sample.ll new file mode 100644 index 000000000000..c724395b98c2 --- /dev/null +++ b/test/CodeGen/R600/llvm.SI.sample.ll @@ -0,0 +1,106 @@ +;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s + +;CHECK: IMAGE_SAMPLE +;CHECK: IMAGE_SAMPLE +;CHECK: IMAGE_SAMPLE +;CHECK: IMAGE_SAMPLE +;CHECK: IMAGE_SAMPLE +;CHECK: IMAGE_SAMPLE_C +;CHECK: IMAGE_SAMPLE_C +;CHECK: IMAGE_SAMPLE_C +;CHECK: IMAGE_SAMPLE +;CHECK: IMAGE_SAMPLE +;CHECK: IMAGE_SAMPLE_C +;CHECK: IMAGE_SAMPLE_C +;CHECK: IMAGE_SAMPLE_C +;CHECK: IMAGE_SAMPLE +;CHECK: IMAGE_SAMPLE +;CHECK: IMAGE_SAMPLE + +define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) { + %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0 + %v2 = insertelement <4 x i32> undef, i32 %a1, i32 1 + %v3 = insertelement <4 x i32> undef, i32 %a1, i32 2 + %v4 = insertelement <4 x i32> undef, i32 %a1, i32 3 + %v5 = insertelement <4 x i32> undef, i32 %a2, i32 0 + %v6 = insertelement <4 x i32> undef, i32 %a2, i32 1 + %v7 = insertelement <4 x i32> undef, i32 %a2, i32 2 + %v8 = insertelement <4 x i32> undef, i32 %a2, i32 3 + %v9 = insertelement <4 x i32> undef, i32 %a3, i32 0 + %v10 = insertelement <4 x i32> undef, i32 %a3, i32 1 + %v11 = insertelement <4 x i32> undef, i32 %a3, i32 2 + %v12 = insertelement <4 x i32> undef, i32 %a3, i32 3 + %v13 = insertelement <4 x i32> undef, i32 %a4, i32 0 + %v14 = insertelement <4 x i32> undef, i32 %a4, i32 1 + %v15 = insertelement <4 x i32> undef, i32 %a4, i32 2 + %v16 = insertelement <4 x i32> undef, i32 %a4, i32 3 + %res1 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v1, + <8 x i32> undef, <4 x i32> undef, i32 1) + %res2 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v2, + <8 x i32> undef, <4 x i32> undef, i32 2) + %res3 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v3, + <8 x i32> undef, <4 x i32> undef, i32 3) + %res4 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v4, + <8 x i32> undef, <4 x i32> undef, i32 4) + %res5 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v5, + <8 x i32> undef, <4 x i32> undef, i32 5) + %res6 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v6, + <8 x i32> undef, <4 x i32> undef, i32 6) + %res7 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v7, + <8 x i32> undef, <4 x i32> undef, i32 7) + %res8 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v8, + <8 x i32> undef, <4 x i32> undef, i32 8) + %res9 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v9, + <8 x i32> undef, <4 x i32> undef, i32 9) + %res10 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v10, + <8 x i32> undef, <4 x i32> undef, i32 10) + %res11 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v11, + <8 x i32> undef, <4 x i32> undef, i32 11) + %res12 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v12, + <8 x i32> undef, <4 x i32> undef, i32 12) + %res13 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v13, + <8 x i32> undef, <4 x i32> undef, i32 13) + %res14 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v14, + <8 x i32> undef, <4 x i32> undef, i32 14) + %res15 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v15, + <8 x i32> undef, <4 x i32> undef, i32 15) + %res16 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v16, + <8 x i32> undef, <4 x i32> undef, i32 16) + %e1 = extractelement <4 x float> %res1, i32 0 + %e2 = extractelement <4 x float> %res2, i32 0 + %e3 = extractelement <4 x float> %res3, i32 0 + %e4 = extractelement <4 x float> %res4, i32 0 + %e5 = extractelement <4 x float> %res5, i32 0 + %e6 = extractelement <4 x float> %res6, i32 0 + %e7 = extractelement <4 x float> %res7, i32 0 + %e8 = extractelement <4 x float> %res8, i32 0 + %e9 = extractelement <4 x float> %res9, i32 0 + %e10 = extractelement <4 x float> %res10, i32 0 + %e11 = extractelement <4 x float> %res11, i32 0 + %e12 = extractelement <4 x float> %res12, i32 0 + %e13 = extractelement <4 x float> %res13, i32 0 + %e14 = extractelement <4 x float> %res14, i32 0 + %e15 = extractelement <4 x float> %res15, i32 0 + %e16 = extractelement <4 x float> %res16, i32 0 + %s1 = fadd float %e1, %e2 + %s2 = fadd float %s1, %e3 + %s3 = fadd float %s2, %e4 + %s4 = fadd float %s3, %e5 + %s5 = fadd float %s4, %e6 + %s6 = fadd float %s5, %e7 + %s7 = fadd float %s6, %e8 + %s8 = fadd float %s7, %e9 + %s9 = fadd float %s8, %e10 + %s10 = fadd float %s9, %e11 + %s11 = fadd float %s10, %e12 + %s12 = fadd float %s11, %e13 + %s13 = fadd float %s12, %e14 + %s14 = fadd float %s13, %e15 + %s15 = fadd float %s14, %e16 + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %s15, float %s15, float %s15, float %s15) + ret void +} + +declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) diff --git a/test/CodeGen/R600/llvm.cos.ll b/test/CodeGen/R600/llvm.cos.ll new file mode 100644 index 000000000000..dc120bfb00c2 --- /dev/null +++ b/test/CodeGen/R600/llvm.cos.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: COS T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.cos.f32(float %r0) + call void @llvm.AMDGPU.store.output(float %r1, i32 0) + ret void +} + +declare float @llvm.cos.f32(float) readnone + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) diff --git a/test/CodeGen/R600/llvm.pow.ll b/test/CodeGen/R600/llvm.pow.ll new file mode 100644 index 000000000000..b4ce9f429f16 --- /dev/null +++ b/test/CodeGen/R600/llvm.pow.ll @@ -0,0 +1,19 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: LOG_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK-NEXT: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK-NEXT: EXP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.R600.load.input(i32 1) + %r2 = call float @llvm.pow.f32( float %r0, float %r1) + call void @llvm.AMDGPU.store.output(float %r2, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + +declare float @llvm.pow.f32(float ,float ) readonly diff --git a/test/CodeGen/R600/llvm.sin.ll b/test/CodeGen/R600/llvm.sin.ll new file mode 100644 index 000000000000..5cd6998c9370 --- /dev/null +++ b/test/CodeGen/R600/llvm.sin.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: SIN T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.sin.f32( float %r0) + call void @llvm.AMDGPU.store.output(float %r1, i32 0) + ret void +} + +declare float @llvm.sin.f32(float) readnone + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) diff --git a/test/CodeGen/R600/load.constant_addrspace.f32.ll b/test/CodeGen/R600/load.constant_addrspace.f32.ll new file mode 100644 index 000000000000..93627283bb94 --- /dev/null +++ b/test/CodeGen/R600/load.constant_addrspace.f32.ll @@ -0,0 +1,9 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: VTX_READ_32 T{{[0-9]+\.X, T[0-9]+\.X}} + +define void @test(float addrspace(1)* %out, float addrspace(2)* %in) { + %1 = load float addrspace(2)* %in + store float %1, float addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/load.i8.ll b/test/CodeGen/R600/load.i8.ll new file mode 100644 index 000000000000..b070dcd52049 --- /dev/null +++ b/test/CodeGen/R600/load.i8.ll @@ -0,0 +1,10 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} + +define void @test(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { + %1 = load i8 addrspace(1)* %in + %2 = zext i8 %1 to i32 + store i32 %2, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/lshl.ll b/test/CodeGen/R600/lshl.ll new file mode 100644 index 000000000000..423adb9da900 --- /dev/null +++ b/test/CodeGen/R600/lshl.ll @@ -0,0 +1,14 @@ +;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s + +;CHECK: V_LSHLREV_B32_e32 VGPR0, 1, VGPR0 + +define void @test(i32 %p) { + %i = mul i32 %p, 2 + %r = bitcast i32 %i to float + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r) + ret void +} + +declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) diff --git a/test/CodeGen/R600/lshr.ll b/test/CodeGen/R600/lshr.ll new file mode 100644 index 000000000000..551eac1d76bf --- /dev/null +++ b/test/CodeGen/R600/lshr.ll @@ -0,0 +1,14 @@ +;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s + +;CHECK: V_LSHRREV_B32_e32 VGPR0, 1, VGPR0 + +define void @test(i32 %p) { + %i = udiv i32 %p, 2 + %r = bitcast i32 %i to float + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r) + ret void +} + +declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) diff --git a/test/CodeGen/R600/mulhu.ll b/test/CodeGen/R600/mulhu.ll new file mode 100644 index 000000000000..28744e00c3cf --- /dev/null +++ b/test/CodeGen/R600/mulhu.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s + +;CHECK: V_MOV_B32_e32 VGPR1, -1431655765 +;CHECK-NEXT: V_MUL_HI_U32 VGPR0, VGPR0, VGPR1, 0, 0, 0, 0, 0 +;CHECK-NEXT: V_LSHRREV_B32_e32 VGPR0, 1, VGPR0 + +define void @test(i32 %p) { + %i = udiv i32 %p, 3 + %r = bitcast i32 %i to float + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r) + ret void +} + +declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) diff --git a/test/CodeGen/R600/predicates.ll b/test/CodeGen/R600/predicates.ll new file mode 100644 index 000000000000..eb8b052b6f72 --- /dev/null +++ b/test/CodeGen/R600/predicates.ll @@ -0,0 +1,104 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; These tests make sure the compiler is optimizing branches using predicates +; when it is legal to do so. + +; CHECK: @simple_if +; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred, +; CHECK: LSHL T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel +define void @simple_if(i32 addrspace(1)* %out, i32 %in) { +entry: + %0 = icmp sgt i32 %in, 0 + br i1 %0, label %IF, label %ENDIF + +IF: + %1 = shl i32 %in, 1 + br label %ENDIF + +ENDIF: + %2 = phi i32 [ %in, %entry ], [ %1, %IF ] + store i32 %2, i32 addrspace(1)* %out + ret void +} + +; CHECK: @simple_if_else +; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred, +; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel +; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel +define void @simple_if_else(i32 addrspace(1)* %out, i32 %in) { +entry: + %0 = icmp sgt i32 %in, 0 + br i1 %0, label %IF, label %ELSE + +IF: + %1 = shl i32 %in, 1 + br label %ENDIF + +ELSE: + %2 = lshr i32 %in, 1 + br label %ENDIF + +ENDIF: + %3 = phi i32 [ %1, %IF ], [ %2, %ELSE ] + store i32 %3, i32 addrspace(1)* %out + ret void +} + +; CHECK: @nested_if +; CHECK: ALU_PUSH_BEFORE +; CHECK: PRED_SET{{[EGN][ET]*}}_INT Exec +; CHECK: JUMP +; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred, +; CHECK: LSHL T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel +; CHECK: POP +define void @nested_if(i32 addrspace(1)* %out, i32 %in) { +entry: + %0 = icmp sgt i32 %in, 0 + br i1 %0, label %IF0, label %ENDIF + +IF0: + %1 = add i32 %in, 10 + %2 = icmp sgt i32 %1, 0 + br i1 %2, label %IF1, label %ENDIF + +IF1: + %3 = shl i32 %1, 1 + br label %ENDIF + +ENDIF: + %4 = phi i32 [%in, %entry], [%1, %IF0], [%3, %IF1] + store i32 %4, i32 addrspace(1)* %out + ret void +} + +; CHECK: @nested_if_else +; CHECK: ALU_PUSH_BEFORE +; CHECK: PRED_SET{{[EGN][ET]*}}_INT Exec +; CHECK: JUMP +; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred, +; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel +; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel +; CHECK: POP +define void @nested_if_else(i32 addrspace(1)* %out, i32 %in) { +entry: + %0 = icmp sgt i32 %in, 0 + br i1 %0, label %IF0, label %ENDIF + +IF0: + %1 = add i32 %in, 10 + %2 = icmp sgt i32 %1, 0 + br i1 %2, label %IF1, label %ELSE1 + +IF1: + %3 = shl i32 %1, 1 + br label %ENDIF + +ELSE1: + %4 = lshr i32 %in, 1 + br label %ENDIF + +ENDIF: + %5 = phi i32 [%in, %entry], [%3, %IF1], [%4, %ELSE1] + store i32 %5, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/reciprocal.ll b/test/CodeGen/R600/reciprocal.ll new file mode 100644 index 000000000000..6838c1ae3662 --- /dev/null +++ b/test/CodeGen/R600/reciprocal.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = fdiv float 1.0, %r0 + call void @llvm.AMDGPU.store.output(float %r1, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + +declare float @llvm.AMDGPU.rcp(float ) readnone diff --git a/test/CodeGen/R600/schedule-fs-loop-nested-if.ll b/test/CodeGen/R600/schedule-fs-loop-nested-if.ll new file mode 100644 index 000000000000..ba9620c40a49 --- /dev/null +++ b/test/CodeGen/R600/schedule-fs-loop-nested-if.ll @@ -0,0 +1,83 @@ +;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched +;REQUIRES: asserts + +define void @main() { +main_body: + %0 = call float @llvm.R600.interp.input(i32 0, i32 0) + %1 = call float @llvm.R600.interp.input(i32 1, i32 0) + %2 = call float @llvm.R600.interp.input(i32 2, i32 0) + %3 = call float @llvm.R600.interp.input(i32 3, i32 0) + %4 = fcmp ult float %1, 0.000000e+00 + %5 = select i1 %4, float 1.000000e+00, float 0.000000e+00 + %6 = fsub float -0.000000e+00, %5 + %7 = fptosi float %6 to i32 + %8 = bitcast i32 %7 to float + %9 = fcmp ult float %0, 5.700000e+01 + %10 = select i1 %9, float 1.000000e+00, float 0.000000e+00 + %11 = fsub float -0.000000e+00, %10 + %12 = fptosi float %11 to i32 + %13 = bitcast i32 %12 to float + %14 = bitcast float %8 to i32 + %15 = bitcast float %13 to i32 + %16 = and i32 %14, %15 + %17 = bitcast i32 %16 to float + %18 = bitcast float %17 to i32 + %19 = icmp ne i32 %18, 0 + %20 = fcmp ult float %0, 0.000000e+00 + %21 = select i1 %20, float 1.000000e+00, float 0.000000e+00 + %22 = fsub float -0.000000e+00, %21 + %23 = fptosi float %22 to i32 + %24 = bitcast i32 %23 to float + %25 = bitcast float %24 to i32 + %26 = icmp ne i32 %25, 0 + br i1 %19, label %IF, label %ELSE + +IF: ; preds = %main_body + %. = select i1 %26, float 0.000000e+00, float 1.000000e+00 + %.18 = select i1 %26, float 1.000000e+00, float 0.000000e+00 + br label %ENDIF + +ELSE: ; preds = %main_body + br i1 %26, label %ENDIF, label %ELSE17 + +ENDIF: ; preds = %ELSE17, %ELSE, %IF + %temp1.0 = phi float [ %., %IF ], [ %48, %ELSE17 ], [ 0.000000e+00, %ELSE ] + %temp2.0 = phi float [ 0.000000e+00, %IF ], [ %49, %ELSE17 ], [ 1.000000e+00, %ELSE ] + %temp.0 = phi float [ %.18, %IF ], [ %47, %ELSE17 ], [ 0.000000e+00, %ELSE ] + %27 = call float @llvm.AMDIL.clamp.(float %temp.0, float 0.000000e+00, float 1.000000e+00) + %28 = call float @llvm.AMDIL.clamp.(float %temp1.0, float 0.000000e+00, float 1.000000e+00) + %29 = call float @llvm.AMDIL.clamp.(float %temp2.0, float 0.000000e+00, float 1.000000e+00) + %30 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00) + %31 = insertelement <4 x float> undef, float %27, i32 0 + %32 = insertelement <4 x float> %31, float %28, i32 1 + %33 = insertelement <4 x float> %32, float %29, i32 2 + %34 = insertelement <4 x float> %33, float %30, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %34, i32 0, i32 0) + ret void + +ELSE17: ; preds = %ELSE + %35 = fadd float 0.000000e+00, 0x3FC99999A0000000 + %36 = fadd float 0.000000e+00, 0x3FC99999A0000000 + %37 = fadd float 0.000000e+00, 0x3FC99999A0000000 + %38 = fadd float %35, 0x3FC99999A0000000 + %39 = fadd float %36, 0x3FC99999A0000000 + %40 = fadd float %37, 0x3FC99999A0000000 + %41 = fadd float %38, 0x3FC99999A0000000 + %42 = fadd float %39, 0x3FC99999A0000000 + %43 = fadd float %40, 0x3FC99999A0000000 + %44 = fadd float %41, 0x3FC99999A0000000 + %45 = fadd float %42, 0x3FC99999A0000000 + %46 = fadd float %43, 0x3FC99999A0000000 + %47 = fadd float %44, 0x3FC99999A0000000 + %48 = fadd float %45, 0x3FC99999A0000000 + %49 = fadd float %46, 0x3FC99999A0000000 + br label %ENDIF +} + +declare float @llvm.R600.interp.input(i32, i32) #0 + +declare float @llvm.AMDIL.clamp.(float, float, float) #0 + +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { readnone } diff --git a/test/CodeGen/R600/schedule-fs-loop-nested.ll b/test/CodeGen/R600/schedule-fs-loop-nested.ll new file mode 100644 index 000000000000..5e875c49ab51 --- /dev/null +++ b/test/CodeGen/R600/schedule-fs-loop-nested.ll @@ -0,0 +1,88 @@ +;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched +;REQUIRES: asserts + +define void @main() { +main_body: + %0 = load <4 x float> addrspace(9)* null + %1 = extractelement <4 x float> %0, i32 3 + %2 = fptosi float %1 to i32 + %3 = bitcast i32 %2 to float + %4 = bitcast float %3 to i32 + %5 = sdiv i32 %4, 4 + %6 = bitcast i32 %5 to float + %7 = bitcast float %6 to i32 + %8 = mul i32 %7, 4 + %9 = bitcast i32 %8 to float + %10 = bitcast float %9 to i32 + %11 = sub i32 0, %10 + %12 = bitcast i32 %11 to float + %13 = bitcast float %3 to i32 + %14 = bitcast float %12 to i32 + %15 = add i32 %13, %14 + %16 = bitcast i32 %15 to float + %17 = load <4 x float> addrspace(9)* null + %18 = extractelement <4 x float> %17, i32 0 + %19 = load <4 x float> addrspace(9)* null + %20 = extractelement <4 x float> %19, i32 1 + %21 = load <4 x float> addrspace(9)* null + %22 = extractelement <4 x float> %21, i32 2 + br label %LOOP + +LOOP: ; preds = %IF31, %main_body + %temp12.0 = phi float [ 0.000000e+00, %main_body ], [ %47, %IF31 ] + %temp6.0 = phi float [ %22, %main_body ], [ %temp6.1, %IF31 ] + %temp5.0 = phi float [ %20, %main_body ], [ %temp5.1, %IF31 ] + %temp4.0 = phi float [ %18, %main_body ], [ %temp4.1, %IF31 ] + %23 = bitcast float %temp12.0 to i32 + %24 = bitcast float %6 to i32 + %25 = icmp sge i32 %23, %24 + %26 = sext i1 %25 to i32 + %27 = bitcast i32 %26 to float + %28 = bitcast float %27 to i32 + %29 = icmp ne i32 %28, 0 + br i1 %29, label %IF, label %LOOP29 + +IF: ; preds = %LOOP + %30 = call float @llvm.AMDIL.clamp.(float %temp4.0, float 0.000000e+00, float 1.000000e+00) + %31 = call float @llvm.AMDIL.clamp.(float %temp5.0, float 0.000000e+00, float 1.000000e+00) + %32 = call float @llvm.AMDIL.clamp.(float %temp6.0, float 0.000000e+00, float 1.000000e+00) + %33 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00) + %34 = insertelement <4 x float> undef, float %30, i32 0 + %35 = insertelement <4 x float> %34, float %31, i32 1 + %36 = insertelement <4 x float> %35, float %32, i32 2 + %37 = insertelement <4 x float> %36, float %33, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %37, i32 0, i32 0) + ret void + +LOOP29: ; preds = %LOOP, %ENDIF30 + %temp6.1 = phi float [ %temp4.1, %ENDIF30 ], [ %temp6.0, %LOOP ] + %temp5.1 = phi float [ %temp6.1, %ENDIF30 ], [ %temp5.0, %LOOP ] + %temp4.1 = phi float [ %temp5.1, %ENDIF30 ], [ %temp4.0, %LOOP ] + %temp20.0 = phi float [ %50, %ENDIF30 ], [ 0.000000e+00, %LOOP ] + %38 = bitcast float %temp20.0 to i32 + %39 = bitcast float %16 to i32 + %40 = icmp sge i32 %38, %39 + %41 = sext i1 %40 to i32 + %42 = bitcast i32 %41 to float + %43 = bitcast float %42 to i32 + %44 = icmp ne i32 %43, 0 + br i1 %44, label %IF31, label %ENDIF30 + +IF31: ; preds = %LOOP29 + %45 = bitcast float %temp12.0 to i32 + %46 = add i32 %45, 1 + %47 = bitcast i32 %46 to float + br label %LOOP + +ENDIF30: ; preds = %LOOP29 + %48 = bitcast float %temp20.0 to i32 + %49 = add i32 %48, 1 + %50 = bitcast i32 %49 to float + br label %LOOP29 +} + +declare float @llvm.AMDIL.clamp.(float, float, float) #0 + +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { readnone } diff --git a/test/CodeGen/R600/schedule-fs-loop.ll b/test/CodeGen/R600/schedule-fs-loop.ll new file mode 100644 index 000000000000..d142cacd4335 --- /dev/null +++ b/test/CodeGen/R600/schedule-fs-loop.ll @@ -0,0 +1,55 @@ +;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched +;REQUIRES: asserts + +define void @main() { +main_body: + %0 = load <4 x float> addrspace(9)* null + %1 = extractelement <4 x float> %0, i32 3 + %2 = fptosi float %1 to i32 + %3 = bitcast i32 %2 to float + %4 = load <4 x float> addrspace(9)* null + %5 = extractelement <4 x float> %4, i32 0 + %6 = load <4 x float> addrspace(9)* null + %7 = extractelement <4 x float> %6, i32 1 + %8 = load <4 x float> addrspace(9)* null + %9 = extractelement <4 x float> %8, i32 2 + br label %LOOP + +LOOP: ; preds = %ENDIF, %main_body + %temp4.0 = phi float [ %5, %main_body ], [ %temp5.0, %ENDIF ] + %temp5.0 = phi float [ %7, %main_body ], [ %temp6.0, %ENDIF ] + %temp6.0 = phi float [ %9, %main_body ], [ %temp4.0, %ENDIF ] + %temp8.0 = phi float [ 0.000000e+00, %main_body ], [ %27, %ENDIF ] + %10 = bitcast float %temp8.0 to i32 + %11 = bitcast float %3 to i32 + %12 = icmp sge i32 %10, %11 + %13 = sext i1 %12 to i32 + %14 = bitcast i32 %13 to float + %15 = bitcast float %14 to i32 + %16 = icmp ne i32 %15, 0 + br i1 %16, label %IF, label %ENDIF + +IF: ; preds = %LOOP + %17 = call float @llvm.AMDIL.clamp.(float %temp4.0, float 0.000000e+00, float 1.000000e+00) + %18 = call float @llvm.AMDIL.clamp.(float %temp5.0, float 0.000000e+00, float 1.000000e+00) + %19 = call float @llvm.AMDIL.clamp.(float %temp6.0, float 0.000000e+00, float 1.000000e+00) + %20 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00) + %21 = insertelement <4 x float> undef, float %17, i32 0 + %22 = insertelement <4 x float> %21, float %18, i32 1 + %23 = insertelement <4 x float> %22, float %19, i32 2 + %24 = insertelement <4 x float> %23, float %20, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %24, i32 0, i32 0) + ret void + +ENDIF: ; preds = %LOOP + %25 = bitcast float %temp8.0 to i32 + %26 = add i32 %25, 1 + %27 = bitcast i32 %26 to float + br label %LOOP +} + +declare float @llvm.AMDIL.clamp.(float, float, float) #0 + +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { readnone } diff --git a/test/CodeGen/R600/schedule-if-2.ll b/test/CodeGen/R600/schedule-if-2.ll new file mode 100644 index 000000000000..6afd6772926b --- /dev/null +++ b/test/CodeGen/R600/schedule-if-2.ll @@ -0,0 +1,94 @@ +;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched +;REQUIRES: asserts + +define void @main() { +main_body: + %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %1 = extractelement <4 x float> %0, i32 0 + %2 = fadd float 1.000000e+03, %1 + %3 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %4 = extractelement <4 x float> %3, i32 0 + %5 = bitcast float %4 to i32 + %6 = icmp eq i32 %5, 0 + %7 = sext i1 %6 to i32 + %8 = bitcast i32 %7 to float + %9 = bitcast float %8 to i32 + %10 = icmp ne i32 %9, 0 + br i1 %10, label %IF, label %ELSE + +IF: ; preds = %main_body + %11 = call float @fabs(float %2) + %12 = fcmp ueq float %11, 0x7FF0000000000000 + %13 = select i1 %12, float 1.000000e+00, float 0.000000e+00 + %14 = fsub float -0.000000e+00, %13 + %15 = fptosi float %14 to i32 + %16 = bitcast i32 %15 to float + %17 = bitcast float %16 to i32 + %18 = icmp ne i32 %17, 0 + %. = select i1 %18, float 0x36A0000000000000, float 0.000000e+00 + %19 = fcmp une float %2, %2 + %20 = select i1 %19, float 1.000000e+00, float 0.000000e+00 + %21 = fsub float -0.000000e+00, %20 + %22 = fptosi float %21 to i32 + %23 = bitcast i32 %22 to float + %24 = bitcast float %23 to i32 + %25 = icmp ne i32 %24, 0 + %temp8.0 = select i1 %25, float 0x36A0000000000000, float 0.000000e+00 + %26 = bitcast float %. to i32 + %27 = sitofp i32 %26 to float + %28 = bitcast float %temp8.0 to i32 + %29 = sitofp i32 %28 to float + %30 = fcmp ugt float %2, 0.000000e+00 + %31 = select i1 %30, float 1.000000e+00, float %2 + %32 = fcmp uge float %31, 0.000000e+00 + %33 = select i1 %32, float %31, float -1.000000e+00 + %34 = fadd float %33, 1.000000e+00 + %35 = fmul float %34, 5.000000e-01 + br label %ENDIF + +ELSE: ; preds = %main_body + %36 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %37 = extractelement <4 x float> %36, i32 0 + %38 = bitcast float %37 to i32 + %39 = icmp eq i32 %38, 1 + %40 = sext i1 %39 to i32 + %41 = bitcast i32 %40 to float + %42 = bitcast float %41 to i32 + %43 = icmp ne i32 %42, 0 + br i1 %43, label %IF23, label %ENDIF + +ENDIF: ; preds = %IF23, %ELSE, %IF + %temp4.0 = phi float [ %2, %IF ], [ %56, %IF23 ], [ 0.000000e+00, %ELSE ] + %temp5.0 = phi float [ %27, %IF ], [ %60, %IF23 ], [ 0.000000e+00, %ELSE ] + %temp6.0 = phi float [ %29, %IF ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF23 ] + %temp7.0 = phi float [ %35, %IF ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF23 ] + %44 = insertelement <4 x float> undef, float %temp4.0, i32 0 + %45 = insertelement <4 x float> %44, float %temp5.0, i32 1 + %46 = insertelement <4 x float> %45, float %temp6.0, i32 2 + %47 = insertelement <4 x float> %46, float %temp7.0, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %47, i32 0, i32 0) + ret void + +IF23: ; preds = %ELSE + %48 = fcmp ult float 0.000000e+00, %2 + %49 = select i1 %48, float 1.000000e+00, float 0.000000e+00 + %50 = fsub float -0.000000e+00, %49 + %51 = fptosi float %50 to i32 + %52 = bitcast i32 %51 to float + %53 = bitcast float %52 to i32 + %54 = icmp ne i32 %53, 0 + %.28 = select i1 %54, float 0x36A0000000000000, float 0.000000e+00 + %55 = bitcast float %.28 to i32 + %56 = sitofp i32 %55 to float + %57 = load <4 x float> addrspace(8)* null + %58 = extractelement <4 x float> %57, i32 0 + %59 = fsub float -0.000000e+00, %58 + %60 = fadd float %2, %59 + br label %ENDIF +} + +declare float @fabs(float) #0 + +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { readonly } diff --git a/test/CodeGen/R600/schedule-if.ll b/test/CodeGen/R600/schedule-if.ll new file mode 100644 index 000000000000..347d92fd6a0e --- /dev/null +++ b/test/CodeGen/R600/schedule-if.ll @@ -0,0 +1,46 @@ +;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched +;REQUIRES: asserts + +define void @main() { +main_body: + %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %1 = extractelement <4 x float> %0, i32 0 + %2 = bitcast float %1 to i32 + %3 = icmp eq i32 %2, 0 + %4 = sext i1 %3 to i32 + %5 = bitcast i32 %4 to float + %6 = bitcast float %5 to i32 + %7 = icmp ne i32 %6, 0 + br i1 %7, label %ENDIF, label %ELSE + +ELSE: ; preds = %main_body + %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %9 = extractelement <4 x float> %8, i32 0 + %10 = bitcast float %9 to i32 + %11 = icmp eq i32 %10, 1 + %12 = sext i1 %11 to i32 + %13 = bitcast i32 %12 to float + %14 = bitcast float %13 to i32 + %15 = icmp ne i32 %14, 0 + br i1 %15, label %IF13, label %ENDIF + +ENDIF: ; preds = %IF13, %ELSE, %main_body + %temp.0 = phi float [ 1.000000e+03, %main_body ], [ 1.000000e+00, %IF13 ], [ 0.000000e+00, %ELSE ] + %temp1.0 = phi float [ 0.000000e+00, %main_body ], [ %23, %IF13 ], [ 0.000000e+00, %ELSE ] + %temp3.0 = phi float [ 1.000000e+00, %main_body ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF13 ] + %16 = insertelement <4 x float> undef, float %temp.0, i32 0 + %17 = insertelement <4 x float> %16, float %temp1.0, i32 1 + %18 = insertelement <4 x float> %17, float 0.000000e+00, i32 2 + %19 = insertelement <4 x float> %18, float %temp3.0, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %19, i32 0, i32 0) + ret void + +IF13: ; preds = %ELSE + %20 = load <4 x float> addrspace(8)* null + %21 = extractelement <4 x float> %20, i32 0 + %22 = fsub float -0.000000e+00, %21 + %23 = fadd float 1.000000e+03, %22 + br label %ENDIF +} + +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) diff --git a/test/CodeGen/R600/schedule-vs-if-nested-loop.ll b/test/CodeGen/R600/schedule-vs-if-nested-loop.ll new file mode 100644 index 000000000000..44b7c2f68002 --- /dev/null +++ b/test/CodeGen/R600/schedule-vs-if-nested-loop.ll @@ -0,0 +1,134 @@ +;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched +;REQUIRES: asserts + +define void @main() { +main_body: + %0 = call float @llvm.R600.load.input(i32 4) + %1 = call float @llvm.R600.load.input(i32 5) + %2 = call float @llvm.R600.load.input(i32 6) + %3 = call float @llvm.R600.load.input(i32 7) + %4 = fcmp ult float %0, 0.000000e+00 + %5 = select i1 %4, float 1.000000e+00, float 0.000000e+00 + %6 = fsub float -0.000000e+00, %5 + %7 = fptosi float %6 to i32 + %8 = bitcast i32 %7 to float + %9 = bitcast float %8 to i32 + %10 = icmp ne i32 %9, 0 + br i1 %10, label %LOOP, label %ENDIF + +ENDIF: ; preds = %ENDIF16, %LOOP, %main_body + %temp.0 = phi float [ 0.000000e+00, %main_body ], [ %temp.1, %LOOP ], [ %temp.1, %ENDIF16 ] + %temp1.0 = phi float [ 1.000000e+00, %main_body ], [ %temp1.1, %LOOP ], [ %temp1.1, %ENDIF16 ] + %temp2.0 = phi float [ 0.000000e+00, %main_body ], [ %temp2.1, %LOOP ], [ %temp2.1, %ENDIF16 ] + %temp3.0 = phi float [ 0.000000e+00, %main_body ], [ %temp3.1, %LOOP ], [ %temp3.1, %ENDIF16 ] + %11 = load <4 x float> addrspace(9)* null + %12 = extractelement <4 x float> %11, i32 0 + %13 = fmul float %12, %0 + %14 = load <4 x float> addrspace(9)* null + %15 = extractelement <4 x float> %14, i32 1 + %16 = fmul float %15, %0 + %17 = load <4 x float> addrspace(9)* null + %18 = extractelement <4 x float> %17, i32 2 + %19 = fmul float %18, %0 + %20 = load <4 x float> addrspace(9)* null + %21 = extractelement <4 x float> %20, i32 3 + %22 = fmul float %21, %0 + %23 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1) + %24 = extractelement <4 x float> %23, i32 0 + %25 = fmul float %24, %1 + %26 = fadd float %25, %13 + %27 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1) + %28 = extractelement <4 x float> %27, i32 1 + %29 = fmul float %28, %1 + %30 = fadd float %29, %16 + %31 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1) + %32 = extractelement <4 x float> %31, i32 2 + %33 = fmul float %32, %1 + %34 = fadd float %33, %19 + %35 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1) + %36 = extractelement <4 x float> %35, i32 3 + %37 = fmul float %36, %1 + %38 = fadd float %37, %22 + %39 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2) + %40 = extractelement <4 x float> %39, i32 0 + %41 = fmul float %40, %2 + %42 = fadd float %41, %26 + %43 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2) + %44 = extractelement <4 x float> %43, i32 1 + %45 = fmul float %44, %2 + %46 = fadd float %45, %30 + %47 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2) + %48 = extractelement <4 x float> %47, i32 2 + %49 = fmul float %48, %2 + %50 = fadd float %49, %34 + %51 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2) + %52 = extractelement <4 x float> %51, i32 3 + %53 = fmul float %52, %2 + %54 = fadd float %53, %38 + %55 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3) + %56 = extractelement <4 x float> %55, i32 0 + %57 = fmul float %56, %3 + %58 = fadd float %57, %42 + %59 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3) + %60 = extractelement <4 x float> %59, i32 1 + %61 = fmul float %60, %3 + %62 = fadd float %61, %46 + %63 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3) + %64 = extractelement <4 x float> %63, i32 2 + %65 = fmul float %64, %3 + %66 = fadd float %65, %50 + %67 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3) + %68 = extractelement <4 x float> %67, i32 3 + %69 = fmul float %68, %3 + %70 = fadd float %69, %54 + %71 = insertelement <4 x float> undef, float %58, i32 0 + %72 = insertelement <4 x float> %71, float %62, i32 1 + %73 = insertelement <4 x float> %72, float %66, i32 2 + %74 = insertelement <4 x float> %73, float %70, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %74, i32 60, i32 1) + %75 = insertelement <4 x float> undef, float %temp.0, i32 0 + %76 = insertelement <4 x float> %75, float %temp1.0, i32 1 + %77 = insertelement <4 x float> %76, float %temp2.0, i32 2 + %78 = insertelement <4 x float> %77, float %temp3.0, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %78, i32 0, i32 2) + ret void + +LOOP: ; preds = %main_body, %ENDIF19 + %temp.1 = phi float [ %93, %ENDIF19 ], [ 0.000000e+00, %main_body ] + %temp1.1 = phi float [ %94, %ENDIF19 ], [ 1.000000e+00, %main_body ] + %temp2.1 = phi float [ %95, %ENDIF19 ], [ 0.000000e+00, %main_body ] + %temp3.1 = phi float [ %96, %ENDIF19 ], [ 0.000000e+00, %main_body ] + %temp4.0 = phi float [ %97, %ENDIF19 ], [ -2.000000e+00, %main_body ] + %79 = fcmp uge float %temp4.0, %0 + %80 = select i1 %79, float 1.000000e+00, float 0.000000e+00 + %81 = fsub float -0.000000e+00, %80 + %82 = fptosi float %81 to i32 + %83 = bitcast i32 %82 to float + %84 = bitcast float %83 to i32 + %85 = icmp ne i32 %84, 0 + br i1 %85, label %ENDIF, label %ENDIF16 + +ENDIF16: ; preds = %LOOP + %86 = fcmp une float %2, %temp4.0 + %87 = select i1 %86, float 1.000000e+00, float 0.000000e+00 + %88 = fsub float -0.000000e+00, %87 + %89 = fptosi float %88 to i32 + %90 = bitcast i32 %89 to float + %91 = bitcast float %90 to i32 + %92 = icmp ne i32 %91, 0 + br i1 %92, label %ENDIF, label %ENDIF19 + +ENDIF19: ; preds = %ENDIF16 + %93 = fadd float %temp.1, 1.000000e+00 + %94 = fadd float %temp1.1, 0.000000e+00 + %95 = fadd float %temp2.1, 0.000000e+00 + %96 = fadd float %temp3.1, 0.000000e+00 + %97 = fadd float %temp4.0, 1.000000e+00 + br label %LOOP +} + +declare float @llvm.R600.load.input(i32) #0 + +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { readnone } diff --git a/test/CodeGen/R600/sdiv.ll b/test/CodeGen/R600/sdiv.ll new file mode 100644 index 000000000000..3556facfbab3 --- /dev/null +++ b/test/CodeGen/R600/sdiv.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; The code generated by sdiv is long and complex and may frequently change. +; The goal of this test is to make sure the ISel doesn't fail. +; +; This program was previously failing to compile when one of the selectcc +; opcodes generated by the sdiv lowering was being legalized and optimized to: +; selectcc Remainder -1, 0, -1, SETGT +; This was fixed by adding an additional pattern in R600Instructions.td to +; match this pattern with a CNDGE_INT. + +; CHECK: RETURN + +define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1 + %num = load i32 addrspace(1) * %in + %den = load i32 addrspace(1) * %den_ptr + %result = sdiv i32 %num, %den + store i32 %result, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/selectcc-icmp-select-float.ll b/test/CodeGen/R600/selectcc-icmp-select-float.ll new file mode 100644 index 000000000000..359ca1e6f8ce --- /dev/null +++ b/test/CodeGen/R600/selectcc-icmp-select-float.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; Note additional optimizations may cause this SGT to be replaced with a +; CND* instruction. +; CHECK: SETGT_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], literal.x, -1}} +; Test a selectcc with i32 LHS/RHS and float True/False + +define void @test(float addrspace(1)* %out, i32 addrspace(1)* %in) { +entry: + %0 = load i32 addrspace(1)* %in + %1 = icmp sge i32 %0, 0 + %2 = select i1 %1, float 1.0, float 0.0 + store float %2, float addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/selectcc-opt.ll b/test/CodeGen/R600/selectcc-opt.ll new file mode 100644 index 000000000000..02d935390423 --- /dev/null +++ b/test/CodeGen/R600/selectcc-opt.ll @@ -0,0 +1,64 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: @test_a +; CHECK-NOT: CND +; CHECK: SET{{[NEQGTL]+}}_DX10 + +define void @test_a(i32 addrspace(1)* %out, float %in) { +entry: + %0 = fcmp ult float %in, 0.000000e+00 + %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 + %2 = fsub float -0.000000e+00, %1 + %3 = fptosi float %2 to i32 + %4 = bitcast i32 %3 to float + %5 = bitcast float %4 to i32 + %6 = icmp ne i32 %5, 0 + br i1 %6, label %IF, label %ENDIF + +IF: + %7 = getelementptr i32 addrspace(1)* %out, i32 1 + store i32 0, i32 addrspace(1)* %7 + br label %ENDIF + +ENDIF: + store i32 0, i32 addrspace(1)* %out + ret void +} + +; Same as test_a, but the branch labels are swapped to produce the inverse cc +; for the icmp instruction + +; CHECK: @test_b +; CHECK: SET{{[GTEQN]+}}_DX10 +; CHECK-NEXT: PRED_ +define void @test_b(i32 addrspace(1)* %out, float %in) { +entry: + %0 = fcmp ult float %in, 0.0 + %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 + %2 = fsub float -0.000000e+00, %1 + %3 = fptosi float %2 to i32 + %4 = bitcast i32 %3 to float + %5 = bitcast float %4 to i32 + %6 = icmp ne i32 %5, 0 + br i1 %6, label %ENDIF, label %IF + +IF: + %7 = getelementptr i32 addrspace(1)* %out, i32 1 + store i32 0, i32 addrspace(1)* %7 + br label %ENDIF + +ENDIF: + store i32 0, i32 addrspace(1)* %out + ret void +} + +; Test a CND*_INT instruction with float true/false values +; CHECK: @test_c +; CHECK: CND{{[GTE]+}}_INT +define void @test_c(float addrspace(1)* %out, i32 %in) { +entry: + %0 = icmp sgt i32 %in, 0 + %1 = select i1 %0, float 2.0, float 3.0 + store float %1, float addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/selectcc_cnde.ll b/test/CodeGen/R600/selectcc_cnde.ll new file mode 100644 index 000000000000..f0a0f512ba15 --- /dev/null +++ b/test/CodeGen/R600/selectcc_cnde.ll @@ -0,0 +1,11 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK-NOT: SETE +;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], 1.0, literal.x, [-0-9]+\(2.0}} +define void @test(float addrspace(1)* %out, float addrspace(1)* %in) { + %1 = load float addrspace(1)* %in + %2 = fcmp oeq float %1, 0.0 + %3 = select i1 %2, float 1.0, float 2.0 + store float %3, float addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/selectcc_cnde_int.ll b/test/CodeGen/R600/selectcc_cnde_int.ll new file mode 100644 index 000000000000..b38078e26db6 --- /dev/null +++ b/test/CodeGen/R600/selectcc_cnde_int.ll @@ -0,0 +1,11 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK-NOT: SETE_INT +;CHECK: CNDE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], 1, literal.x, 2}} +define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %1 = load i32 addrspace(1)* %in + %2 = icmp eq i32 %1, 0 + %3 = select i1 %2, i32 1, i32 2 + store i32 %3, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/set-dx10.ll b/test/CodeGen/R600/set-dx10.ll new file mode 100644 index 000000000000..54febcf0e68e --- /dev/null +++ b/test/CodeGen/R600/set-dx10.ll @@ -0,0 +1,137 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; These tests check that floating point comparisons which are used by select +; to store integer true (-1) and false (0) values are lowered to one of the +; SET*DX10 instructions. + +; CHECK: @fcmp_une_select_fptosi +; CHECK: SETNE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00) +define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) { +entry: + %0 = fcmp une float %in, 5.0 + %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 + %2 = fsub float -0.000000e+00, %1 + %3 = fptosi float %2 to i32 + store i32 %3, i32 addrspace(1)* %out + ret void +} + +; CHECK: @fcmp_une_select_i32 +; CHECK: SETNE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00) +define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) { +entry: + %0 = fcmp une float %in, 5.0 + %1 = select i1 %0, i32 -1, i32 0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; CHECK: @fcmp_ueq_select_fptosi +; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00) +define void @fcmp_ueq_select_fptosi(i32 addrspace(1)* %out, float %in) { +entry: + %0 = fcmp ueq float %in, 5.0 + %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 + %2 = fsub float -0.000000e+00, %1 + %3 = fptosi float %2 to i32 + store i32 %3, i32 addrspace(1)* %out + ret void +} + +; CHECK: @fcmp_ueq_select_i32 +; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00) +define void @fcmp_ueq_select_i32(i32 addrspace(1)* %out, float %in) { +entry: + %0 = fcmp ueq float %in, 5.0 + %1 = select i1 %0, i32 -1, i32 0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; CHECK: @fcmp_ugt_select_fptosi +; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00) +define void @fcmp_ugt_select_fptosi(i32 addrspace(1)* %out, float %in) { +entry: + %0 = fcmp ugt float %in, 5.0 + %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 + %2 = fsub float -0.000000e+00, %1 + %3 = fptosi float %2 to i32 + store i32 %3, i32 addrspace(1)* %out + ret void +} + +; CHECK: @fcmp_ugt_select_i32 +; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00) +define void @fcmp_ugt_select_i32(i32 addrspace(1)* %out, float %in) { +entry: + %0 = fcmp ugt float %in, 5.0 + %1 = select i1 %0, i32 -1, i32 0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; CHECK: @fcmp_uge_select_fptosi +; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00) +define void @fcmp_uge_select_fptosi(i32 addrspace(1)* %out, float %in) { +entry: + %0 = fcmp uge float %in, 5.0 + %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 + %2 = fsub float -0.000000e+00, %1 + %3 = fptosi float %2 to i32 + store i32 %3, i32 addrspace(1)* %out + ret void +} + +; CHECK: @fcmp_uge_select_i32 +; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00) +define void @fcmp_uge_select_i32(i32 addrspace(1)* %out, float %in) { +entry: + %0 = fcmp uge float %in, 5.0 + %1 = select i1 %0, i32 -1, i32 0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; CHECK: @fcmp_ule_select_fptosi +; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00) +define void @fcmp_ule_select_fptosi(i32 addrspace(1)* %out, float %in) { +entry: + %0 = fcmp ule float %in, 5.0 + %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 + %2 = fsub float -0.000000e+00, %1 + %3 = fptosi float %2 to i32 + store i32 %3, i32 addrspace(1)* %out + ret void +} + +; CHECK: @fcmp_ule_select_i32 +; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00) +define void @fcmp_ule_select_i32(i32 addrspace(1)* %out, float %in) { +entry: + %0 = fcmp ule float %in, 5.0 + %1 = select i1 %0, i32 -1, i32 0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; CHECK: @fcmp_ult_select_fptosi +; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00) +define void @fcmp_ult_select_fptosi(i32 addrspace(1)* %out, float %in) { +entry: + %0 = fcmp ult float %in, 5.0 + %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 + %2 = fsub float -0.000000e+00, %1 + %3 = fptosi float %2 to i32 + store i32 %3, i32 addrspace(1)* %out + ret void +} + +; CHECK: @fcmp_ult_select_i32 +; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00) +define void @fcmp_ult_select_i32(i32 addrspace(1)* %out, float %in) { +entry: + %0 = fcmp ult float %in, 5.0 + %1 = select i1 %0, i32 -1, i32 0 + store i32 %1, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/setcc.v4i32.ll b/test/CodeGen/R600/setcc.v4i32.ll new file mode 100644 index 000000000000..0752f2e63dbf --- /dev/null +++ b/test/CodeGen/R600/setcc.v4i32.ll @@ -0,0 +1,12 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +;CHECK: SETE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32> addrspace(1) * %in + %b = load <4 x i32> addrspace(1) * %b_ptr + %result = icmp eq <4 x i32> %a, %b + %sext = sext <4 x i1> %result to <4 x i32> + store <4 x i32> %sext, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/seto.ll b/test/CodeGen/R600/seto.ll new file mode 100644 index 000000000000..5ab4b87d570c --- /dev/null +++ b/test/CodeGen/R600/seto.ll @@ -0,0 +1,13 @@ +;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s + +;CHECK: V_CMP_O_F32_e64 SGPR0_SGPR1, VGPR0, VGPR0, 0, 0, 0, 0 + +define void @main(float %p) { +main_body: + %c = fcmp oeq float %p, %p + %r = select i1 %c, float 1.000000e+00, float 0.000000e+00 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %r, float %r, float %r, float %r) + ret void +} + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) diff --git a/test/CodeGen/R600/setuo.ll b/test/CodeGen/R600/setuo.ll new file mode 100644 index 000000000000..320835576d41 --- /dev/null +++ b/test/CodeGen/R600/setuo.ll @@ -0,0 +1,13 @@ +;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s + +;CHECK: V_CMP_U_F32_e64 SGPR0_SGPR1, VGPR0, VGPR0, 0, 0, 0, 0 + +define void @main(float %p) { +main_body: + %c = fcmp une float %p, %p + %r = select i1 %c, float 1.000000e+00, float 0.000000e+00 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %r, float %r, float %r, float %r) + ret void +} + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) diff --git a/test/CodeGen/R600/short-args.ll b/test/CodeGen/R600/short-args.ll new file mode 100644 index 000000000000..b69e327bf6df --- /dev/null +++ b/test/CodeGen/R600/short-args.ll @@ -0,0 +1,41 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: @i8_arg +; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} + +define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind { +entry: + %0 = zext i8 %in to i32 + store i32 %0, i32 addrspace(1)* %out, align 4 + ret void +} + +; CHECK: @i8_zext_arg +; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} + +define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind { +entry: + %0 = zext i8 %in to i32 + store i32 %0, i32 addrspace(1)* %out, align 4 + ret void +} + +; CHECK: @i16_arg +; CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} + +define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind { +entry: + %0 = zext i16 %in to i32 + store i32 %0, i32 addrspace(1)* %out, align 4 + ret void +} + +; CHECK: @i16_zext_arg +; CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} + +define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind { +entry: + %0 = zext i16 %in to i32 + store i32 %0, i32 addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/store.v4f32.ll b/test/CodeGen/R600/store.v4f32.ll new file mode 100644 index 000000000000..8b0d24445971 --- /dev/null +++ b/test/CodeGen/R600/store.v4f32.ll @@ -0,0 +1,9 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1 + +define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %1 = load <4 x float> addrspace(1) * %in + store <4 x float> %1, <4 x float> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/store.v4i32.ll b/test/CodeGen/R600/store.v4i32.ll new file mode 100644 index 000000000000..a659815ddeba --- /dev/null +++ b/test/CodeGen/R600/store.v4i32.ll @@ -0,0 +1,9 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1 + +define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %1 = load <4 x i32> addrspace(1) * %in + store <4 x i32> %1, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/udiv.v4i32.ll b/test/CodeGen/R600/udiv.v4i32.ll new file mode 100644 index 000000000000..47657a6be75e --- /dev/null +++ b/test/CodeGen/R600/udiv.v4i32.ll @@ -0,0 +1,15 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;The code generated by udiv is long and complex and may frequently change. +;The goal of this test is to make sure the ISel doesn't fail when it gets +;a v4i32 udiv +;CHECK: RETURN + +define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32> addrspace(1) * %in + %b = load <4 x i32> addrspace(1) * %b_ptr + %result = udiv <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/unsupported-cc.ll b/test/CodeGen/R600/unsupported-cc.ll new file mode 100644 index 000000000000..b48c59151831 --- /dev/null +++ b/test/CodeGen/R600/unsupported-cc.ll @@ -0,0 +1,83 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; These tests are for condition codes that are not supported by the hardware + +; CHECK: @slt +; CHECK: SETGT_INT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 5(7.006492e-45) +define void @slt(i32 addrspace(1)* %out, i32 %in) { +entry: + %0 = icmp slt i32 %in, 5 + %1 = select i1 %0, i32 -1, i32 0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; CHECK: @ult_i32 +; CHECK: SETGT_UINT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 5(7.006492e-45) +define void @ult_i32(i32 addrspace(1)* %out, i32 %in) { +entry: + %0 = icmp ult i32 %in, 5 + %1 = select i1 %0, i32 -1, i32 0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; CHECK: @ult_float +; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00) +define void @ult_float(float addrspace(1)* %out, float %in) { +entry: + %0 = fcmp ult float %in, 5.0 + %1 = select i1 %0, float 1.0, float 0.0 + store float %1, float addrspace(1)* %out + ret void +} + +; CHECK: @olt +; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00) +define void @olt(float addrspace(1)* %out, float %in) { +entry: + %0 = fcmp olt float %in, 5.0 + %1 = select i1 %0, float 1.0, float 0.0 + store float %1, float addrspace(1)* %out + ret void +} + +; CHECK: @sle +; CHECK: SETGT_INT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 6(8.407791e-45) +define void @sle(i32 addrspace(1)* %out, i32 %in) { +entry: + %0 = icmp sle i32 %in, 5 + %1 = select i1 %0, i32 -1, i32 0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; CHECK: @ule_i32 +; CHECK: SETGT_UINT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 6(8.407791e-45) +define void @ule_i32(i32 addrspace(1)* %out, i32 %in) { +entry: + %0 = icmp ule i32 %in, 5 + %1 = select i1 %0, i32 -1, i32 0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; CHECK: @ule_float +; CHECK: SETGE T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00) +define void @ule_float(float addrspace(1)* %out, float %in) { +entry: + %0 = fcmp ule float %in, 5.0 + %1 = select i1 %0, float 1.0, float 0.0 + store float %1, float addrspace(1)* %out + ret void +} + +; CHECK: @ole +; CHECK: SETGE T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00) +define void @ole(float addrspace(1)* %out, float %in) { +entry: + %0 = fcmp ole float %in, 5.0 + %1 = select i1 %0, float 1.0, float 0.0 + store float %1, float addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/urem.v4i32.ll b/test/CodeGen/R600/urem.v4i32.ll new file mode 100644 index 000000000000..2e7388caa6ce --- /dev/null +++ b/test/CodeGen/R600/urem.v4i32.ll @@ -0,0 +1,15 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;The code generated by urem is long and complex and may frequently change. +;The goal of this test is to make sure the ISel doesn't fail when it gets +;a v4i32 urem +;CHECK: RETURN + +define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32> addrspace(1) * %in + %b = load <4 x i32> addrspace(1) * %b_ptr + %result = urem <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/vec4-expand.ll b/test/CodeGen/R600/vec4-expand.ll new file mode 100644 index 000000000000..8f62bc692908 --- /dev/null +++ b/test/CodeGen/R600/vec4-expand.ll @@ -0,0 +1,53 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: @fp_to_sint +; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @fp_to_sint(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %value = load <4 x float> addrspace(1) * %in + %result = fptosi <4 x float> %value to <4 x i32> + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} + +; CHECK: @fp_to_uint +; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @fp_to_uint(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %value = load <4 x float> addrspace(1) * %in + %result = fptoui <4 x float> %value to <4 x i32> + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} + +; CHECK: @sint_to_fp +; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @sint_to_fp(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %value = load <4 x i32> addrspace(1) * %in + %result = sitofp <4 x i32> %value to <4 x float> + store <4 x float> %result, <4 x float> addrspace(1)* %out + ret void +} + +; CHECK: @uint_to_fp +; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @uint_to_fp(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %value = load <4 x i32> addrspace(1) * %in + %result = uitofp <4 x i32> %value to <4 x float> + store <4 x float> %result, <4 x float> addrspace(1)* %out + ret void +} |