aboutsummaryrefslogtreecommitdiff
path: root/test/CodeGen/X86/vector-reduce-or.ll
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-01-19 10:01:25 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-01-19 10:01:25 +0000
commitd8e91e46262bc44006913e6796843909f1ac7bcd (patch)
tree7d0c143d9b38190e0fa0180805389da22cd834c5 /test/CodeGen/X86/vector-reduce-or.ll
parentb7eb8e35e481a74962664b63dfb09483b200209a (diff)
downloadsrc-d8e91e46262bc44006913e6796843909f1ac7bcd.tar.gz
src-d8e91e46262bc44006913e6796843909f1ac7bcd.zip
Vendor import of llvm trunk r351319 (just before the release_80 branchvendor/llvm/llvm-trunk-r351319
Notes
Notes: svn path=/vendor/llvm/dist/; revision=343171 svn path=/vendor/llvm/llvm-trunk-r351319/; revision=343172; tag=vendor/llvm/llvm-trunk-r351319
Diffstat (limited to 'test/CodeGen/X86/vector-reduce-or.ll')
-rw-r--r--test/CodeGen/X86/vector-reduce-or.ll313
1 files changed, 270 insertions, 43 deletions
diff --git a/test/CodeGen/X86/vector-reduce-or.ll b/test/CodeGen/X86/vector-reduce-or.ll
index 04ec6cfc970e..1cb6af66bf46 100644
--- a/test/CodeGen/X86/vector-reduce-or.ll
+++ b/test/CodeGen/X86/vector-reduce-or.ll
@@ -49,7 +49,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -59,7 +59,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -69,7 +69,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -94,7 +94,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -105,7 +105,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -117,7 +117,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -148,7 +148,7 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -161,7 +161,7 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -174,7 +174,7 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -186,6 +186,31 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; vXi32
;
+define i32 @test_v2i32(<2 x i32> %a0) {
+; SSE-LABEL: test_v2i32:
+; SSE: # %bb.0:
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE-NEXT: por %xmm0, %xmm1
+; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test_v2i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmovd %xmm0, %eax
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: test_v2i32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: retq
+ %1 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> %a0)
+ ret i32 %1
+}
+
define i32 @test_v4i32(<4 x i32> %a0) {
; SSE-LABEL: test_v4i32:
; SSE: # %bb.0:
@@ -235,7 +260,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -247,7 +272,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -259,7 +284,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -288,7 +313,7 @@ define i32 @test_v16i32(<16 x i32> %a0) {
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -301,7 +326,7 @@ define i32 @test_v16i32(<16 x i32> %a0) {
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -309,13 +334,13 @@ define i32 @test_v16i32(<16 x i32> %a0) {
; AVX512-LABEL: test_v16i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -350,7 +375,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -365,22 +390,22 @@ define i32 @test_v32i32(<32 x i32> %a0) {
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: test_v32i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -392,6 +417,68 @@ define i32 @test_v32i32(<32 x i32> %a0) {
; vXi16
;
+define i16 @test_v2i16(<2 x i16> %a0) {
+; SSE-LABEL: test_v2i16:
+; SSE: # %bb.0:
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE-NEXT: por %xmm0, %xmm1
+; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: # kill: def $ax killed $ax killed $eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test_v2i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmovd %xmm0, %eax
+; AVX-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: test_v2i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512-NEXT: retq
+ %1 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> %a0)
+ ret i16 %1
+}
+
+define i16 @test_v4i16(<4 x i16> %a0) {
+; SSE-LABEL: test_v4i16:
+; SSE: # %bb.0:
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE-NEXT: por %xmm0, %xmm1
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE-NEXT: por %xmm1, %xmm0
+; SSE-NEXT: movd %xmm0, %eax
+; SSE-NEXT: # kill: def $ax killed $ax killed $eax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test_v4i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmovd %xmm0, %eax
+; AVX-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: test_v4i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512-NEXT: retq
+ %1 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> %a0)
+ ret i16 %1
+}
+
define i16 @test_v8i16(<8 x i16> %a0) {
; SSE-LABEL: test_v8i16:
; SSE: # %bb.0:
@@ -457,7 +544,7 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
@@ -472,7 +559,7 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
@@ -487,7 +574,7 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
@@ -523,7 +610,7 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
@@ -539,7 +626,7 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
@@ -556,7 +643,7 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
@@ -598,7 +685,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
@@ -616,7 +703,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
@@ -634,7 +721,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
@@ -647,6 +734,140 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; vXi8
;
+define i8 @test_v2i8(<2 x i8> %a0) {
+; SSE2-LABEL: test_v2i8:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE2-NEXT: por %xmm0, %xmm1
+; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: # kill: def $al killed $al killed $eax
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_v2i8:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE41-NEXT: por %xmm0, %xmm1
+; SSE41-NEXT: pextrb $0, %xmm1, %eax
+; SSE41-NEXT: # kill: def $al killed $al killed $eax
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: test_v2i8:
+; AVX: # %bb.0:
+; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: # kill: def $al killed $al killed $eax
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: test_v2i8:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: # kill: def $al killed $al killed $eax
+; AVX512-NEXT: retq
+ %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> %a0)
+ ret i8 %1
+}
+
+define i8 @test_v4i8(<4 x i8> %a0) {
+; SSE2-LABEL: test_v4i8:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE2-NEXT: por %xmm0, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: # kill: def $al killed $al killed $eax
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_v4i8:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE41-NEXT: por %xmm0, %xmm1
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: pextrb $0, %xmm0, %eax
+; SSE41-NEXT: # kill: def $al killed $al killed $eax
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: test_v4i8:
+; AVX: # %bb.0:
+; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: # kill: def $al killed $al killed $eax
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: test_v4i8:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: # kill: def $al killed $al killed $eax
+; AVX512-NEXT: retq
+ %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> %a0)
+ ret i8 %1
+}
+
+define i8 @test_v8i8(<8 x i8> %a0) {
+; SSE2-LABEL: test_v8i8:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE2-NEXT: por %xmm0, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psrld $16, %xmm1
+; SSE2-NEXT: por %xmm0, %xmm1
+; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: # kill: def $al killed $al killed $eax
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_v8i8:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE41-NEXT: por %xmm0, %xmm1
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: psrld $16, %xmm1
+; SSE41-NEXT: por %xmm0, %xmm1
+; SSE41-NEXT: pextrb $0, %xmm1, %eax
+; SSE41-NEXT: # kill: def $al killed $al killed $eax
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: test_v8i8:
+; AVX: # %bb.0:
+; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
+; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: # kill: def $al killed $al killed $eax
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: test_v8i8:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: # kill: def $al killed $al killed $eax
+; AVX512-NEXT: retq
+ %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> %a0)
+ ret i8 %1
+}
+
define i8 @test_v16i8(<16 x i8> %a0) {
; SSE2-LABEL: test_v16i8:
; SSE2: # %bb.0:
@@ -757,7 +978,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
@@ -774,7 +995,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
@@ -791,7 +1012,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
@@ -851,7 +1072,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
@@ -869,7 +1090,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
@@ -888,7 +1109,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
@@ -958,7 +1179,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
@@ -978,7 +1199,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
@@ -998,7 +1219,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
@@ -1012,16 +1233,22 @@ declare i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64>)
declare i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64>)
declare i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64>)
+declare i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32>)
declare i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32>)
declare i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32>)
declare i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32>)
declare i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32>)
+declare i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16>)
declare i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16>)
declare i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16>)
declare i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16>)
declare i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16>)
+declare i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8>)
declare i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8>)
declare i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8>)
declare i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8>)