aboutsummaryrefslogtreecommitdiff
path: root/test/CodeGen/X86/horizontal-reduce-smin.ll
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/X86/horizontal-reduce-smin.ll')
-rw-r--r--test/CodeGen/X86/horizontal-reduce-smin.ll284
1 files changed, 121 insertions, 163 deletions
diff --git a/test/CodeGen/X86/horizontal-reduce-smin.ll b/test/CodeGen/X86/horizontal-reduce-smin.ll
index f03e745598e6..fa5828a45700 100644
--- a/test/CodeGen/X86/horizontal-reduce-smin.ll
+++ b/test/CodeGen/X86/horizontal-reduce-smin.ll
@@ -311,30 +311,25 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
;
; X86-SSE42-LABEL: test_reduce_v16i8:
; X86-SSE42: ## %bb.0:
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-SSE42-NEXT: pminsb %xmm0, %xmm1
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE42-NEXT: psrld $16, %xmm1
-; X86-SSE42-NEXT: pminsb %xmm0, %xmm1
-; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
-; X86-SSE42-NEXT: psrlw $8, %xmm0
-; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
+; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
+; X86-SSE42-NEXT: psrlw $8, %xmm2
+; X86-SSE42-NEXT: pminub %xmm0, %xmm2
+; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
; X86-SSE42-NEXT: retl
;
; X86-AVX-LABEL: test_reduce_v16i8:
; X86-AVX: ## %bb.0:
-; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X86-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX-NEXT: ## kill: def %al killed %al killed %eax
; X86-AVX-NEXT: retl
@@ -373,30 +368,25 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
;
; X64-SSE42-LABEL: test_reduce_v16i8:
; X64-SSE42: ## %bb.0:
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-SSE42-NEXT: pminsb %xmm0, %xmm1
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X64-SSE42-NEXT: psrld $16, %xmm1
-; X64-SSE42-NEXT: pminsb %xmm0, %xmm1
-; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
-; X64-SSE42-NEXT: psrlw $8, %xmm0
-; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
+; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
+; X64-SSE42-NEXT: psrlw $8, %xmm2
+; X64-SSE42-NEXT: pminub %xmm0, %xmm2
+; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
; X64-SSE42-NEXT: retq
;
; X64-AVX-LABEL: test_reduce_v16i8:
; X64-AVX: ## %bb.0:
-; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X64-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX-NEXT: retq
@@ -910,16 +900,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X86-SSE42-LABEL: test_reduce_v32i8:
; X86-SSE42: ## %bb.0:
; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-SSE42-NEXT: pminsb %xmm0, %xmm1
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE42-NEXT: psrld $16, %xmm1
-; X86-SSE42-NEXT: pminsb %xmm0, %xmm1
-; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
-; X86-SSE42-NEXT: psrlw $8, %xmm0
-; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
+; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
+; X86-SSE42-NEXT: psrlw $8, %xmm2
+; X86-SSE42-NEXT: pminub %xmm0, %xmm2
+; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
; X86-SSE42-NEXT: retl
@@ -928,14 +915,12 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax
; X86-AVX1-NEXT: vzeroupper
@@ -944,15 +929,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X86-AVX2-LABEL: test_reduce_v32i8:
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax
; X86-AVX2-NEXT: vzeroupper
@@ -998,16 +981,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-SSE42-LABEL: test_reduce_v32i8:
; X64-SSE42: ## %bb.0:
; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-SSE42-NEXT: pminsb %xmm0, %xmm1
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X64-SSE42-NEXT: psrld $16, %xmm1
-; X64-SSE42-NEXT: pminsb %xmm0, %xmm1
-; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
-; X64-SSE42-NEXT: psrlw $8, %xmm0
-; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
+; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
+; X64-SSE42-NEXT: psrlw $8, %xmm2
+; X64-SSE42-NEXT: pminub %xmm0, %xmm2
+; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
; X64-SSE42-NEXT: retq
@@ -1016,14 +996,12 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-AVX1: ## %bb.0:
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX1-NEXT: vzeroupper
@@ -1032,15 +1010,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-AVX2-LABEL: test_reduce_v32i8:
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX2-NEXT: vzeroupper
@@ -1049,15 +1025,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-AVX512-LABEL: test_reduce_v32i8:
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX512-NEXT: vzeroupper
@@ -1745,16 +1719,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X86-SSE42-NEXT: pminsb %xmm3, %xmm1
; X86-SSE42-NEXT: pminsb %xmm2, %xmm0
; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-SSE42-NEXT: pminsb %xmm0, %xmm1
-; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
-; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE42-NEXT: psrld $16, %xmm1
-; X86-SSE42-NEXT: pminsb %xmm0, %xmm1
-; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
-; X86-SSE42-NEXT: psrlw $8, %xmm0
-; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
+; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
+; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
+; X86-SSE42-NEXT: psrlw $8, %xmm2
+; X86-SSE42-NEXT: pminub %xmm0, %xmm2
+; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
+; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
; X86-SSE42-NEXT: retl
@@ -1766,14 +1737,12 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X86-AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax
; X86-AVX1-NEXT: vzeroupper
@@ -1783,15 +1752,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
+; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax
; X86-AVX2-NEXT: vzeroupper
@@ -1849,16 +1816,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-SSE42-NEXT: pminsb %xmm3, %xmm1
; X64-SSE42-NEXT: pminsb %xmm2, %xmm0
; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-SSE42-NEXT: pminsb %xmm0, %xmm1
-; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
-; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
-; X64-SSE42-NEXT: psrld $16, %xmm1
-; X64-SSE42-NEXT: pminsb %xmm0, %xmm1
-; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
-; X64-SSE42-NEXT: psrlw $8, %xmm0
-; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
+; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
+; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
+; X64-SSE42-NEXT: psrlw $8, %xmm2
+; X64-SSE42-NEXT: pminub %xmm0, %xmm2
+; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
+; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
; X64-SSE42-NEXT: retq
@@ -1870,14 +1834,12 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX1-NEXT: vzeroupper
@@ -1887,15 +1849,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX2-NEXT: vzeroupper
@@ -1904,17 +1864,15 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-AVX512-LABEL: test_reduce_v64i8:
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; X64-AVX512-NEXT: vpminsb %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT: vpminsb %zmm1, %zmm0, %zmm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT: vpminsb %zmm1, %zmm0, %zmm0
-; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT: vpminsb %zmm1, %zmm0, %zmm0
-; X64-AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; X64-AVX512-NEXT: vpminsb %zmm1, %zmm0, %zmm0
-; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX512-NEXT: vpminsb %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
+; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax
; X64-AVX512-NEXT: vzeroupper