diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2019-01-19 10:01:25 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2019-01-19 10:01:25 +0000 |
commit | d8e91e46262bc44006913e6796843909f1ac7bcd (patch) | |
tree | 7d0c143d9b38190e0fa0180805389da22cd834c5 /test/CodeGen/X86/vector-reduce-or.ll | |
parent | b7eb8e35e481a74962664b63dfb09483b200209a (diff) | |
download | src-d8e91e46262bc44006913e6796843909f1ac7bcd.tar.gz src-d8e91e46262bc44006913e6796843909f1ac7bcd.zip |
Vendor import of llvm trunk r351319 (just before the release_80 branchvendor/llvm/llvm-trunk-r351319
Notes
Notes:
svn path=/vendor/llvm/dist/; revision=343171
svn path=/vendor/llvm/llvm-trunk-r351319/; revision=343172; tag=vendor/llvm/llvm-trunk-r351319
Diffstat (limited to 'test/CodeGen/X86/vector-reduce-or.ll')
-rw-r--r-- | test/CodeGen/X86/vector-reduce-or.ll | 313 |
1 files changed, 270 insertions, 43 deletions
diff --git a/test/CodeGen/X86/vector-reduce-or.ll b/test/CodeGen/X86/vector-reduce-or.ll index 04ec6cfc970e..1cb6af66bf46 100644 --- a/test/CodeGen/X86/vector-reduce-or.ll +++ b/test/CodeGen/X86/vector-reduce-or.ll @@ -49,7 +49,7 @@ define i64 @test_v4i64(<4 x i64> %a0) { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -59,7 +59,7 @@ define i64 @test_v4i64(<4 x i64> %a0) { ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -69,7 +69,7 @@ define i64 @test_v4i64(<4 x i64> %a0) { ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -94,7 +94,7 @@ define i64 @test_v8i64(<8 x i64> %a0) { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -105,7 +105,7 @@ define i64 @test_v8i64(<8 x i64> %a0) { ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -117,7 +117,7 @@ define i64 @test_v8i64(<8 x i64> %a0) { ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -148,7 +148,7 @@ define i64 @test_v16i64(<16 x i64> %a0) { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -161,7 +161,7 @@ define i64 @test_v16i64(<16 x i64> %a0) { ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -174,7 +174,7 @@ define i64 @test_v16i64(<16 x i64> %a0) { ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -186,6 +186,31 @@ define i64 @test_v16i64(<16 x i64> %a0) { ; vXi32 ; +define i32 @test_v2i32(<2 x i32> %a0) { +; SSE-LABEL: test_v2i32: +; SSE: # %bb.0: +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: retq +; +; AVX-LABEL: test_v2i32: +; AVX: # %bb.0: +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: retq +; +; AVX512-LABEL: test_v2i32: +; AVX512: # %bb.0: +; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vmovd %xmm0, %eax +; AVX512-NEXT: retq + %1 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> %a0) + ret i32 %1 +} + define i32 @test_v4i32(<4 x i32> %a0) { ; SSE-LABEL: test_v4i32: ; SSE: # %bb.0: @@ -235,7 +260,7 @@ define i32 @test_v8i32(<8 x i32> %a0) { ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -247,7 +272,7 @@ define i32 @test_v8i32(<8 x i32> %a0) { ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -259,7 +284,7 @@ define i32 @test_v8i32(<8 x i32> %a0) { ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -288,7 +313,7 @@ define i32 @test_v16i32(<16 x i32> %a0) { ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -301,7 +326,7 @@ define i32 @test_v16i32(<16 x i32> %a0) { ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -309,13 +334,13 @@ define i32 @test_v16i32(<16 x i32> %a0) { ; AVX512-LABEL: test_v16i32: ; AVX512: # %bb.0: ; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -350,7 +375,7 @@ define i32 @test_v32i32(<32 x i32> %a0) { ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -365,22 +390,22 @@ define i32 @test_v32i32(<32 x i32> %a0) { ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512-LABEL: test_v32i32: ; AVX512: # %bb.0: -; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -392,6 +417,68 @@ define i32 @test_v32i32(<32 x i32> %a0) { ; vXi16 ; +define i16 @test_v2i16(<2 x i16> %a0) { +; SSE-LABEL: test_v2i16: +; SSE: # %bb.0: +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: # kill: def $ax killed $ax killed $eax +; SSE-NEXT: retq +; +; AVX-LABEL: test_v2i16: +; AVX: # %bb.0: +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: # kill: def $ax killed $ax killed $eax +; AVX-NEXT: retq +; +; AVX512-LABEL: test_v2i16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vmovd %xmm0, %eax +; AVX512-NEXT: # kill: def $ax killed $ax killed $eax +; AVX512-NEXT: retq + %1 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> %a0) + ret i16 %1 +} + +define i16 @test_v4i16(<4 x i16> %a0) { +; SSE-LABEL: test_v4i16: +; SSE: # %bb.0: +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: movd %xmm0, %eax +; SSE-NEXT: # kill: def $ax killed $ax killed $eax +; SSE-NEXT: retq +; +; AVX-LABEL: test_v4i16: +; AVX: # %bb.0: +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: # kill: def $ax killed $ax killed $eax +; AVX-NEXT: retq +; +; AVX512-LABEL: test_v4i16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vmovd %xmm0, %eax +; AVX512-NEXT: # kill: def $ax killed $ax killed $eax +; AVX512-NEXT: retq + %1 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> %a0) + ret i16 %1 +} + define i16 @test_v8i16(<8 x i16> %a0) { ; SSE-LABEL: test_v8i16: ; SSE: # %bb.0: @@ -457,7 +544,7 @@ define i16 @test_v16i16(<16 x i16> %a0) { ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper @@ -472,7 +559,7 @@ define i16 @test_v16i16(<16 x i16> %a0) { ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper @@ -487,7 +574,7 @@ define i16 @test_v16i16(<16 x i16> %a0) { ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper @@ -523,7 +610,7 @@ define i16 @test_v32i16(<32 x i16> %a0) { ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper @@ -539,7 +626,7 @@ define i16 @test_v32i16(<32 x i16> %a0) { ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper @@ -556,7 +643,7 @@ define i16 @test_v32i16(<32 x i16> %a0) { ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper @@ -598,7 +685,7 @@ define i16 @test_v64i16(<64 x i16> %a0) { ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper @@ -616,7 +703,7 @@ define i16 @test_v64i16(<64 x i16> %a0) { ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper @@ -634,7 +721,7 @@ define i16 @test_v64i16(<64 x i16> %a0) { ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper @@ -647,6 +734,140 @@ define i16 @test_v64i16(<64 x i16> %a0) { ; vXi8 ; +define i8 @test_v2i8(<2 x i8> %a0) { +; SSE2-LABEL: test_v2i8: +; SSE2: # %bb.0: +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE2-NEXT: por %xmm0, %xmm1 +; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: # kill: def $al killed $al killed $eax +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_v2i8: +; SSE41: # %bb.0: +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE41-NEXT: por %xmm0, %xmm1 +; SSE41-NEXT: pextrb $0, %xmm1, %eax +; SSE41-NEXT: # kill: def $al killed $al killed $eax +; SSE41-NEXT: retq +; +; AVX-LABEL: test_v2i8: +; AVX: # %bb.0: +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: # kill: def $al killed $al killed $eax +; AVX-NEXT: retq +; +; AVX512-LABEL: test_v2i8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: # kill: def $al killed $al killed $eax +; AVX512-NEXT: retq + %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> %a0) + ret i8 %1 +} + +define i8 @test_v4i8(<4 x i8> %a0) { +; SSE2-LABEL: test_v4i8: +; SSE2: # %bb.0: +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE2-NEXT: por %xmm0, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: # kill: def $al killed $al killed $eax +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_v4i8: +; SSE41: # %bb.0: +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE41-NEXT: por %xmm0, %xmm1 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE41-NEXT: por %xmm1, %xmm0 +; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: # kill: def $al killed $al killed $eax +; SSE41-NEXT: retq +; +; AVX-LABEL: test_v4i8: +; AVX: # %bb.0: +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: # kill: def $al killed $al killed $eax +; AVX-NEXT: retq +; +; AVX512-LABEL: test_v4i8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: # kill: def $al killed $al killed $eax +; AVX512-NEXT: retq + %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> %a0) + ret i8 %1 +} + +define i8 @test_v8i8(<8 x i8> %a0) { +; SSE2-LABEL: test_v8i8: +; SSE2: # %bb.0: +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE2-NEXT: por %xmm0, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: psrld $16, %xmm1 +; SSE2-NEXT: por %xmm0, %xmm1 +; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: # kill: def $al killed $al killed $eax +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_v8i8: +; SSE41: # %bb.0: +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE41-NEXT: por %xmm0, %xmm1 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE41-NEXT: por %xmm1, %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: psrld $16, %xmm1 +; SSE41-NEXT: por %xmm0, %xmm1 +; SSE41-NEXT: pextrb $0, %xmm1, %eax +; SSE41-NEXT: # kill: def $al killed $al killed $eax +; SSE41-NEXT: retq +; +; AVX-LABEL: test_v8i8: +; AVX: # %bb.0: +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: # kill: def $al killed $al killed $eax +; AVX-NEXT: retq +; +; AVX512-LABEL: test_v8i8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: # kill: def $al killed $al killed $eax +; AVX512-NEXT: retq + %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> %a0) + ret i8 %1 +} + define i8 @test_v16i8(<16 x i8> %a0) { ; SSE2-LABEL: test_v16i8: ; SSE2: # %bb.0: @@ -757,7 +978,7 @@ define i8 @test_v32i8(<32 x i8> %a0) { ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 -; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpextrb $0, %xmm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper @@ -774,7 +995,7 @@ define i8 @test_v32i8(<32 x i8> %a0) { ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 -; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpextrb $0, %xmm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper @@ -791,7 +1012,7 @@ define i8 @test_v32i8(<32 x i8> %a0) { ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper @@ -851,7 +1072,7 @@ define i8 @test_v64i8(<64 x i8> %a0) { ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 -; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpextrb $0, %xmm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper @@ -869,7 +1090,7 @@ define i8 @test_v64i8(<64 x i8> %a0) { ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 -; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpextrb $0, %xmm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper @@ -888,7 +1109,7 @@ define i8 @test_v64i8(<64 x i8> %a0) { ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 -; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper @@ -958,7 +1179,7 @@ define i8 @test_v128i8(<128 x i8> %a0) { ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 -; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpextrb $0, %xmm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper @@ -978,7 +1199,7 @@ define i8 @test_v128i8(<128 x i8> %a0) { ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 -; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpextrb $0, %xmm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper @@ -998,7 +1219,7 @@ define i8 @test_v128i8(<128 x i8> %a0) { ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 -; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper @@ -1012,16 +1233,22 @@ declare i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64>) declare i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64>) declare i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64>) +declare i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32>) declare i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32>) declare i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32>) declare i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32>) declare i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32>) +declare i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16>) +declare i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16>) declare i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16>) declare i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16>) declare i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16>) declare i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16>) +declare i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8>) +declare i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8>) +declare i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8>) declare i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8>) declare i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8>) declare i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8>) |