diff options
Diffstat (limited to 'test/CodeGen/X86/horizontal-reduce-smin.ll')
-rw-r--r-- | test/CodeGen/X86/horizontal-reduce-smin.ll | 284 |
1 files changed, 121 insertions, 163 deletions
diff --git a/test/CodeGen/X86/horizontal-reduce-smin.ll b/test/CodeGen/X86/horizontal-reduce-smin.ll index f03e745598e6..fa5828a45700 100644 --- a/test/CodeGen/X86/horizontal-reduce-smin.ll +++ b/test/CodeGen/X86/horizontal-reduce-smin.ll @@ -311,30 +311,25 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) { ; ; X86-SSE42-LABEL: test_reduce_v16i8: ; X86-SSE42: ## %bb.0: -; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X86-SSE42-NEXT: pminsb %xmm0, %xmm1 -; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; X86-SSE42-NEXT: pminsb %xmm1, %xmm0 -; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE42-NEXT: psrld $16, %xmm1 -; X86-SSE42-NEXT: pminsb %xmm0, %xmm1 -; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 -; X86-SSE42-NEXT: psrlw $8, %xmm0 -; X86-SSE42-NEXT: pminsb %xmm1, %xmm0 +; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; X86-SSE42-NEXT: pxor %xmm1, %xmm0 +; X86-SSE42-NEXT: movdqa %xmm0, %xmm2 +; X86-SSE42-NEXT: psrlw $8, %xmm2 +; X86-SSE42-NEXT: pminub %xmm0, %xmm2 +; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0 +; X86-SSE42-NEXT: pxor %xmm1, %xmm0 ; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax ; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax ; X86-SSE42-NEXT: retl ; ; X86-AVX-LABEL: test_reduce_v16i8: ; X86-AVX: ## %bb.0: -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 -; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2 +; X86-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 +; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax ; X86-AVX-NEXT: ## kill: def %al killed %al killed %eax ; X86-AVX-NEXT: retl @@ -373,30 +368,25 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) { ; ; X64-SSE42-LABEL: test_reduce_v16i8: ; X64-SSE42: ## %bb.0: -; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X64-SSE42-NEXT: pminsb %xmm0, %xmm1 -; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; X64-SSE42-NEXT: pminsb %xmm1, %xmm0 -; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 -; X64-SSE42-NEXT: psrld $16, %xmm1 -; X64-SSE42-NEXT: pminsb %xmm0, %xmm1 -; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 -; X64-SSE42-NEXT: psrlw $8, %xmm0 -; X64-SSE42-NEXT: pminsb %xmm1, %xmm0 +; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; X64-SSE42-NEXT: pxor %xmm1, %xmm0 +; X64-SSE42-NEXT: movdqa %xmm0, %xmm2 +; X64-SSE42-NEXT: psrlw $8, %xmm2 +; X64-SSE42-NEXT: pminub %xmm0, %xmm2 +; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0 +; X64-SSE42-NEXT: pxor %xmm1, %xmm0 ; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax ; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax ; X64-SSE42-NEXT: retq ; ; X64-AVX-LABEL: test_reduce_v16i8: ; X64-AVX: ## %bb.0: -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 -; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2 +; X64-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 +; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax ; X64-AVX-NEXT: ## kill: def %al killed %al killed %eax ; X64-AVX-NEXT: retq @@ -910,16 +900,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) { ; X86-SSE42-LABEL: test_reduce_v32i8: ; X86-SSE42: ## %bb.0: ; X86-SSE42-NEXT: pminsb %xmm1, %xmm0 -; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X86-SSE42-NEXT: pminsb %xmm0, %xmm1 -; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; X86-SSE42-NEXT: pminsb %xmm1, %xmm0 -; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE42-NEXT: psrld $16, %xmm1 -; X86-SSE42-NEXT: pminsb %xmm0, %xmm1 -; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 -; X86-SSE42-NEXT: psrlw $8, %xmm0 -; X86-SSE42-NEXT: pminsb %xmm1, %xmm0 +; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; X86-SSE42-NEXT: pxor %xmm1, %xmm0 +; X86-SSE42-NEXT: movdqa %xmm0, %xmm2 +; X86-SSE42-NEXT: psrlw $8, %xmm2 +; X86-SSE42-NEXT: pminub %xmm0, %xmm2 +; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0 +; X86-SSE42-NEXT: pxor %xmm1, %xmm0 ; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax ; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax ; X86-SSE42-NEXT: retl @@ -928,14 +915,12 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) { ; X86-AVX1: ## %bb.0: ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 -; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 -; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 +; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 +; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax ; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax ; X86-AVX1-NEXT: vzeroupper @@ -944,15 +929,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) { ; X86-AVX2-LABEL: test_reduce_v32i8: ; X86-AVX2: ## %bb.0: ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 -; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 -; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 -; X86-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 -; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 -; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 -; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 +; X86-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 +; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 +; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax ; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax ; X86-AVX2-NEXT: vzeroupper @@ -998,16 +981,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) { ; X64-SSE42-LABEL: test_reduce_v32i8: ; X64-SSE42: ## %bb.0: ; X64-SSE42-NEXT: pminsb %xmm1, %xmm0 -; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X64-SSE42-NEXT: pminsb %xmm0, %xmm1 -; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; X64-SSE42-NEXT: pminsb %xmm1, %xmm0 -; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 -; X64-SSE42-NEXT: psrld $16, %xmm1 -; X64-SSE42-NEXT: pminsb %xmm0, %xmm1 -; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 -; X64-SSE42-NEXT: psrlw $8, %xmm0 -; X64-SSE42-NEXT: pminsb %xmm1, %xmm0 +; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; X64-SSE42-NEXT: pxor %xmm1, %xmm0 +; X64-SSE42-NEXT: movdqa %xmm0, %xmm2 +; X64-SSE42-NEXT: psrlw $8, %xmm2 +; X64-SSE42-NEXT: pminub %xmm0, %xmm2 +; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0 +; X64-SSE42-NEXT: pxor %xmm1, %xmm0 ; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax ; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax ; X64-SSE42-NEXT: retq @@ -1016,14 +996,12 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) { ; X64-AVX1: ## %bb.0: ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X64-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 -; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 -; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 +; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 +; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax ; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax ; X64-AVX1-NEXT: vzeroupper @@ -1032,15 +1010,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) { ; X64-AVX2-LABEL: test_reduce_v32i8: ; X64-AVX2: ## %bb.0: ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 -; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 -; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 +; X64-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 +; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 +; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax ; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax ; X64-AVX2-NEXT: vzeroupper @@ -1049,15 +1025,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) { ; X64-AVX512-LABEL: test_reduce_v32i8: ; X64-AVX512: ## %bb.0: ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 -; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0 -; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0 -; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0 -; X64-AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 -; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0 -; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 -; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0 +; X64-AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2 +; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 +; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax ; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax ; X64-AVX512-NEXT: vzeroupper @@ -1745,16 +1719,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) { ; X86-SSE42-NEXT: pminsb %xmm3, %xmm1 ; X86-SSE42-NEXT: pminsb %xmm2, %xmm0 ; X86-SSE42-NEXT: pminsb %xmm1, %xmm0 -; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X86-SSE42-NEXT: pminsb %xmm0, %xmm1 -; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; X86-SSE42-NEXT: pminsb %xmm1, %xmm0 -; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE42-NEXT: psrld $16, %xmm1 -; X86-SSE42-NEXT: pminsb %xmm0, %xmm1 -; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 -; X86-SSE42-NEXT: psrlw $8, %xmm0 -; X86-SSE42-NEXT: pminsb %xmm1, %xmm0 +; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; X86-SSE42-NEXT: pxor %xmm1, %xmm0 +; X86-SSE42-NEXT: movdqa %xmm0, %xmm2 +; X86-SSE42-NEXT: psrlw $8, %xmm2 +; X86-SSE42-NEXT: pminub %xmm0, %xmm2 +; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0 +; X86-SSE42-NEXT: pxor %xmm1, %xmm0 ; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax ; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax ; X86-SSE42-NEXT: retl @@ -1766,14 +1737,12 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) { ; X86-AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2 ; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 -; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 -; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 +; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 +; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax ; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax ; X86-AVX1-NEXT: vzeroupper @@ -1783,15 +1752,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) { ; X86-AVX2: ## %bb.0: ; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 -; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 -; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 -; X86-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 -; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 -; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 -; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 +; X86-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 +; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 +; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax ; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax ; X86-AVX2-NEXT: vzeroupper @@ -1849,16 +1816,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) { ; X64-SSE42-NEXT: pminsb %xmm3, %xmm1 ; X64-SSE42-NEXT: pminsb %xmm2, %xmm0 ; X64-SSE42-NEXT: pminsb %xmm1, %xmm0 -; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X64-SSE42-NEXT: pminsb %xmm0, %xmm1 -; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; X64-SSE42-NEXT: pminsb %xmm1, %xmm0 -; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 -; X64-SSE42-NEXT: psrld $16, %xmm1 -; X64-SSE42-NEXT: pminsb %xmm0, %xmm1 -; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 -; X64-SSE42-NEXT: psrlw $8, %xmm0 -; X64-SSE42-NEXT: pminsb %xmm1, %xmm0 +; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; X64-SSE42-NEXT: pxor %xmm1, %xmm0 +; X64-SSE42-NEXT: movdqa %xmm0, %xmm2 +; X64-SSE42-NEXT: psrlw $8, %xmm2 +; X64-SSE42-NEXT: pminub %xmm0, %xmm2 +; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0 +; X64-SSE42-NEXT: pxor %xmm1, %xmm0 ; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax ; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax ; X64-SSE42-NEXT: retq @@ -1870,14 +1834,12 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) { ; X64-AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2 ; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0 -; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X64-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 -; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 -; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 +; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 +; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax ; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax ; X64-AVX1-NEXT: vzeroupper @@ -1887,15 +1849,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) { ; X64-AVX2: ## %bb.0: ; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 -; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 -; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 +; X64-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 +; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 +; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax ; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax ; X64-AVX2-NEXT: vzeroupper @@ -1904,17 +1864,15 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) { ; X64-AVX512-LABEL: test_reduce_v64i8: ; X64-AVX512: ## %bb.0: ; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; X64-AVX512-NEXT: vpminsb %zmm1, %zmm0, %zmm0 +; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 -; X64-AVX512-NEXT: vpminsb %zmm1, %zmm0, %zmm0 -; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X64-AVX512-NEXT: vpminsb %zmm1, %zmm0, %zmm0 -; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X64-AVX512-NEXT: vpminsb %zmm1, %zmm0, %zmm0 -; X64-AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 -; X64-AVX512-NEXT: vpminsb %zmm1, %zmm0, %zmm0 -; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 -; X64-AVX512-NEXT: vpminsb %zmm1, %zmm0, %zmm0 +; X64-AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2 +; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 +; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax ; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax ; X64-AVX512-NEXT: vzeroupper |