aboutsummaryrefslogtreecommitdiff
path: root/test/CodeGen/X86/avx512-insert-extract.ll
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/X86/avx512-insert-extract.ll')
-rw-r--r--test/CodeGen/X86/avx512-insert-extract.ll163
1 files changed, 55 insertions, 108 deletions
diff --git a/test/CodeGen/X86/avx512-insert-extract.ll b/test/CodeGen/X86/avx512-insert-extract.ll
index 7e0b981b2c6a..7477e05f0c7f 100644
--- a/test/CodeGen/X86/avx512-insert-extract.ll
+++ b/test/CodeGen/X86/avx512-insert-extract.ll
@@ -793,11 +793,10 @@ define i32 @test_insertelement_v32i1(i32 %a, i32 %b, <32 x i32> %x , <32 x i32>
; KNL-NEXT: cmpl %esi, %edi
; KNL-NEXT: setb %al
; KNL-NEXT: vpcmpltud %zmm2, %zmm0, %k1
-; KNL-NEXT: movl {{.*}}(%rip), %ecx
-; KNL-NEXT: vpbroadcastd %ecx, %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: vpcmpltud %zmm3, %zmm1, %k1
-; KNL-NEXT: vpbroadcastd %ecx, %zmm1 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm1, %xmm1
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
@@ -1432,8 +1431,7 @@ define i8 @test_extractelement_variable_v16i8(<16 x i8> %t1, i32 %index) {
; CHECK-NEXT: ## kill: def %edi killed %edi def %rdi
; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: andl $15, %edi
-; CHECK-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movb (%rdi,%rax), %al
+; CHECK-NEXT: movb -24(%rsp,%rdi), %al
; CHECK-NEXT: retq
%t2 = extractelement <16 x i8> %t1, i32 %index
ret i8 %t2
@@ -1452,8 +1450,7 @@ define i8 @test_extractelement_variable_v32i8(<32 x i8> %t1, i32 %index) {
; CHECK-NEXT: ## kill: def %edi killed %edi def %rdi
; CHECK-NEXT: vmovaps %ymm0, (%rsp)
; CHECK-NEXT: andl $31, %edi
-; CHECK-NEXT: movq %rsp, %rax
-; CHECK-NEXT: movb (%rdi,%rax), %al
+; CHECK-NEXT: movb (%rsp,%rdi), %al
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: vzeroupper
@@ -1477,8 +1474,7 @@ define i8 @test_extractelement_variable_v64i8(<64 x i8> %t1, i32 %index) {
; KNL-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp)
; KNL-NEXT: vmovaps %ymm0, (%rsp)
; KNL-NEXT: andl $63, %edi
-; KNL-NEXT: movq %rsp, %rax
-; KNL-NEXT: movb (%rdi,%rax), %al
+; KNL-NEXT: movb (%rsp,%rdi), %al
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: vzeroupper
@@ -1496,8 +1492,7 @@ define i8 @test_extractelement_variable_v64i8(<64 x i8> %t1, i32 %index) {
; SKX-NEXT: ## kill: def %edi killed %edi def %rdi
; SKX-NEXT: vmovaps %zmm0, (%rsp)
; SKX-NEXT: andl $63, %edi
-; SKX-NEXT: movq %rsp, %rax
-; SKX-NEXT: movb (%rdi,%rax), %al
+; SKX-NEXT: movb (%rsp,%rdi), %al
; SKX-NEXT: movq %rbp, %rsp
; SKX-NEXT: popq %rbp
; SKX-NEXT: vzeroupper
@@ -1522,8 +1517,7 @@ define i8 @test_extractelement_variable_v64i8_indexi8(<64 x i8> %t1, i8 %index)
; KNL-NEXT: vmovaps %ymm0, (%rsp)
; KNL-NEXT: movzbl %dil, %eax
; KNL-NEXT: andl $63, %eax
-; KNL-NEXT: movq %rsp, %rcx
-; KNL-NEXT: movb (%rax,%rcx), %al
+; KNL-NEXT: movb (%rsp,%rax), %al
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: vzeroupper
@@ -1542,8 +1536,7 @@ define i8 @test_extractelement_variable_v64i8_indexi8(<64 x i8> %t1, i8 %index)
; SKX-NEXT: vmovaps %zmm0, (%rsp)
; SKX-NEXT: movzbl %dil, %eax
; SKX-NEXT: andl $63, %eax
-; SKX-NEXT: movq %rsp, %rcx
-; SKX-NEXT: movb (%rax,%rcx), %al
+; SKX-NEXT: movb (%rsp,%rax), %al
; SKX-NEXT: movq %rbp, %rsp
; SKX-NEXT: popq %rbp
; SKX-NEXT: vzeroupper
@@ -1617,45 +1610,28 @@ define zeroext i8 @test_extractelement_varible_v4i1(<4 x i32> %a, <4 x i32> %b,
define zeroext i8 @test_extractelement_varible_v8i1(<8 x i32> %a, <8 x i32> %b, i32 %index) {
; KNL-LABEL: test_extractelement_varible_v8i1:
; KNL: ## %bb.0:
-; KNL-NEXT: pushq %rbp
-; KNL-NEXT: .cfi_def_cfa_offset 16
-; KNL-NEXT: .cfi_offset %rbp, -16
-; KNL-NEXT: movq %rsp, %rbp
-; KNL-NEXT: .cfi_def_cfa_register %rbp
-; KNL-NEXT: andq $-64, %rsp
-; KNL-NEXT: subq $128, %rsp
; KNL-NEXT: ## kill: def %edi killed %edi def %rdi
; KNL-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1
; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0
; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k1
-; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; KNL-NEXT: vmovdqa64 %zmm0, (%rsp)
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpmovdw %zmm0, %ymm0
+; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
; KNL-NEXT: andl $7, %edi
-; KNL-NEXT: movzbl (%rsp,%rdi,8), %eax
+; KNL-NEXT: movzbl -24(%rsp,%rdi,2), %eax
; KNL-NEXT: andl $1, %eax
-; KNL-NEXT: movq %rbp, %rsp
-; KNL-NEXT: popq %rbp
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test_extractelement_varible_v8i1:
; SKX: ## %bb.0:
-; SKX-NEXT: pushq %rbp
-; SKX-NEXT: .cfi_def_cfa_offset 16
-; SKX-NEXT: .cfi_offset %rbp, -16
-; SKX-NEXT: movq %rsp, %rbp
-; SKX-NEXT: .cfi_def_cfa_register %rbp
-; SKX-NEXT: andq $-64, %rsp
-; SKX-NEXT: subq $128, %rsp
; SKX-NEXT: ## kill: def %edi killed %edi def %rdi
; SKX-NEXT: vpcmpnleud %ymm1, %ymm0, %k0
-; SKX-NEXT: vpmovm2q %k0, %zmm0
-; SKX-NEXT: vmovdqa64 %zmm0, (%rsp)
+; SKX-NEXT: vpmovm2w %k0, %xmm0
+; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
; SKX-NEXT: andl $7, %edi
-; SKX-NEXT: movzbl (%rsp,%rdi,8), %eax
+; SKX-NEXT: movzbl -24(%rsp,%rdi,2), %eax
; SKX-NEXT: andl $1, %eax
-; SKX-NEXT: movq %rbp, %rsp
-; SKX-NEXT: popq %rbp
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
%t1 = icmp ugt <8 x i32> %a, %b
@@ -1667,43 +1643,26 @@ define zeroext i8 @test_extractelement_varible_v8i1(<8 x i32> %a, <8 x i32> %b,
define zeroext i8 @test_extractelement_varible_v16i1(<16 x i32> %a, <16 x i32> %b, i32 %index) {
; KNL-LABEL: test_extractelement_varible_v16i1:
; KNL: ## %bb.0:
-; KNL-NEXT: pushq %rbp
-; KNL-NEXT: .cfi_def_cfa_offset 16
-; KNL-NEXT: .cfi_offset %rbp, -16
-; KNL-NEXT: movq %rsp, %rbp
-; KNL-NEXT: .cfi_def_cfa_register %rbp
-; KNL-NEXT: andq $-64, %rsp
-; KNL-NEXT: subq $128, %rsp
; KNL-NEXT: ## kill: def %edi killed %edi def %rdi
; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k1
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; KNL-NEXT: vmovdqa32 %zmm0, (%rsp)
+; KNL-NEXT: vpmovdb %zmm0, %xmm0
+; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
; KNL-NEXT: andl $15, %edi
-; KNL-NEXT: movzbl (%rsp,%rdi,4), %eax
+; KNL-NEXT: movzbl -24(%rsp,%rdi), %eax
; KNL-NEXT: andl $1, %eax
-; KNL-NEXT: movq %rbp, %rsp
-; KNL-NEXT: popq %rbp
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test_extractelement_varible_v16i1:
; SKX: ## %bb.0:
-; SKX-NEXT: pushq %rbp
-; SKX-NEXT: .cfi_def_cfa_offset 16
-; SKX-NEXT: .cfi_offset %rbp, -16
-; SKX-NEXT: movq %rsp, %rbp
-; SKX-NEXT: .cfi_def_cfa_register %rbp
-; SKX-NEXT: andq $-64, %rsp
-; SKX-NEXT: subq $128, %rsp
; SKX-NEXT: ## kill: def %edi killed %edi def %rdi
; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
-; SKX-NEXT: vpmovm2d %k0, %zmm0
-; SKX-NEXT: vmovdqa32 %zmm0, (%rsp)
+; SKX-NEXT: vpmovm2b %k0, %xmm0
+; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
; SKX-NEXT: andl $15, %edi
-; SKX-NEXT: movzbl (%rsp,%rdi,4), %eax
+; SKX-NEXT: movzbl -24(%rsp,%rdi), %eax
; SKX-NEXT: andl $1, %eax
-; SKX-NEXT: movq %rbp, %rsp
-; SKX-NEXT: popq %rbp
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
%t1 = icmp ugt <16 x i32> %a, %b
@@ -1729,8 +1688,7 @@ define zeroext i8 @test_extractelement_varible_v32i1(<32 x i8> %a, <32 x i8> %b,
; KNL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
; KNL-NEXT: vmovdqa %ymm0, (%rsp)
; KNL-NEXT: andl $31, %edi
-; KNL-NEXT: movq %rsp, %rax
-; KNL-NEXT: movzbl (%rdi,%rax), %eax
+; KNL-NEXT: movzbl (%rsp,%rdi), %eax
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
@@ -1744,14 +1702,14 @@ define zeroext i8 @test_extractelement_varible_v32i1(<32 x i8> %a, <32 x i8> %b,
; SKX-NEXT: .cfi_offset %rbp, -16
; SKX-NEXT: movq %rsp, %rbp
; SKX-NEXT: .cfi_def_cfa_register %rbp
-; SKX-NEXT: andq $-64, %rsp
-; SKX-NEXT: subq $128, %rsp
+; SKX-NEXT: andq $-32, %rsp
+; SKX-NEXT: subq $64, %rsp
; SKX-NEXT: ## kill: def %edi killed %edi def %rdi
; SKX-NEXT: vpcmpnleub %ymm1, %ymm0, %k0
-; SKX-NEXT: vpmovm2w %k0, %zmm0
-; SKX-NEXT: vmovdqa32 %zmm0, (%rsp)
+; SKX-NEXT: vpmovm2b %k0, %ymm0
+; SKX-NEXT: vmovdqa %ymm0, (%rsp)
; SKX-NEXT: andl $31, %edi
-; SKX-NEXT: movzbl (%rsp,%rdi,2), %eax
+; SKX-NEXT: movzbl (%rsp,%rdi), %eax
; SKX-NEXT: andl $1, %eax
; SKX-NEXT: movq %rbp, %rsp
; SKX-NEXT: popq %rbp
@@ -1792,8 +1750,7 @@ define i32 @test_insertelement_variable_v32i1(<32 x i8> %a, i8 %b, i32 %index) {
; KNL-NEXT: andl $31, %esi
; KNL-NEXT: testb %dil, %dil
; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
-; KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rax
-; KNL-NEXT: setne (%rsi,%rax)
+; KNL-NEXT: setne 32(%rsp,%rsi)
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm0
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
@@ -1817,20 +1774,18 @@ define i32 @test_insertelement_variable_v32i1(<32 x i8> %a, i8 %b, i32 %index) {
; SKX-NEXT: .cfi_offset %rbp, -16
; SKX-NEXT: movq %rsp, %rbp
; SKX-NEXT: .cfi_def_cfa_register %rbp
-; SKX-NEXT: andq $-64, %rsp
-; SKX-NEXT: subq $128, %rsp
+; SKX-NEXT: andq $-32, %rsp
+; SKX-NEXT: subq $64, %rsp
; SKX-NEXT: ## kill: def %esi killed %esi def %rsi
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; SKX-NEXT: vpcmpnleub %ymm1, %ymm0, %k1
-; SKX-NEXT: xorl %eax, %eax
-; SKX-NEXT: testb %dil, %dil
-; SKX-NEXT: setne %al
-; SKX-NEXT: vmovdqu16 {{.*}}(%rip), %zmm0 {%k1} {z}
-; SKX-NEXT: vmovdqa32 %zmm0, (%rsp)
+; SKX-NEXT: vpcmpnleub %ymm1, %ymm0, %k0
; SKX-NEXT: andl $31, %esi
-; SKX-NEXT: movw %ax, (%rsp,%rsi,2)
-; SKX-NEXT: vpsllw $15, (%rsp), %zmm0
-; SKX-NEXT: vpmovw2m %zmm0, %k0
+; SKX-NEXT: testb %dil, %dil
+; SKX-NEXT: vpmovm2b %k0, %ymm0
+; SKX-NEXT: vmovdqa %ymm0, (%rsp)
+; SKX-NEXT: setne (%rsp,%rsi)
+; SKX-NEXT: vpsllw $7, (%rsp), %ymm0
+; SKX-NEXT: vpmovb2m %ymm0, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: movq %rbp, %rsp
; SKX-NEXT: popq %rbp
@@ -1863,8 +1818,7 @@ define i64 @test_insertelement_variable_v64i1(<64 x i8> %a, i8 %b, i32 %index) {
; KNL-NEXT: testb %dil, %dil
; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
-; KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rax
-; KNL-NEXT: setne (%rsi,%rax)
+; KNL-NEXT: setne 64(%rsp,%rsi)
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm0
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm1
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm2
@@ -1905,13 +1859,12 @@ define i64 @test_insertelement_variable_v64i1(<64 x i8> %a, i8 %b, i32 %index) {
; SKX-NEXT: subq $128, %rsp
; SKX-NEXT: ## kill: def %esi killed %esi def %rsi
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; SKX-NEXT: vpcmpnleub %zmm1, %zmm0, %k1
+; SKX-NEXT: vpcmpnleub %zmm1, %zmm0, %k0
; SKX-NEXT: andl $63, %esi
; SKX-NEXT: testb %dil, %dil
-; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z}
+; SKX-NEXT: vpmovm2b %k0, %zmm0
; SKX-NEXT: vmovdqa32 %zmm0, (%rsp)
-; SKX-NEXT: movq %rsp, %rax
-; SKX-NEXT: setne (%rsi,%rax)
+; SKX-NEXT: setne (%rsp,%rsi)
; SKX-NEXT: vpsllw $7, (%rsp), %zmm0
; SKX-NEXT: vpmovb2m %zmm0, %k0
; SKX-NEXT: kmovq %k0, %rax
@@ -2050,8 +2003,7 @@ define i96 @test_insertelement_variable_v96i1(<96 x i8> %a, i8 %b, i32 %index) {
; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
; KNL-NEXT: vmovdqa %ymm2, {{[0-9]+}}(%rsp)
-; KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
-; KNL-NEXT: setne (%rax,%rcx)
+; KNL-NEXT: setne 128(%rsp,%rax)
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm1
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm2
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm3
@@ -2215,18 +2167,16 @@ define i96 @test_insertelement_variable_v96i1(<96 x i8> %a, i8 %b, i32 %index) {
; SKX-NEXT: vpinsrb $15, 728(%rbp), %xmm2, %xmm2
; SKX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; SKX-NEXT: vpcmpnleub %zmm2, %zmm0, %k1
-; SKX-NEXT: vpcmpnleub %zmm2, %zmm1, %k2
+; SKX-NEXT: vpcmpnleub %zmm2, %zmm0, %k0
+; SKX-NEXT: vpcmpnleub %zmm2, %zmm1, %k1
; SKX-NEXT: movl 744(%rbp), %eax
; SKX-NEXT: andl $127, %eax
; SKX-NEXT: cmpb $0, 736(%rbp)
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; SKX-NEXT: vmovdqu8 %zmm0, %zmm1 {%k2} {z}
-; SKX-NEXT: vmovdqa32 %zmm1, {{[0-9]+}}(%rsp)
-; SKX-NEXT: vmovdqu8 %zmm0, %zmm0 {%k1} {z}
+; SKX-NEXT: vpmovm2b %k1, %zmm0
+; SKX-NEXT: vmovdqa32 %zmm0, {{[0-9]+}}(%rsp)
+; SKX-NEXT: vpmovm2b %k0, %zmm0
; SKX-NEXT: vmovdqa32 %zmm0, (%rsp)
-; SKX-NEXT: movq %rsp, %rcx
-; SKX-NEXT: setne (%rax,%rcx)
+; SKX-NEXT: setne (%rsp,%rax)
; SKX-NEXT: vpsllw $7, {{[0-9]+}}(%rsp), %zmm0
; SKX-NEXT: vpmovb2m %zmm0, %k0
; SKX-NEXT: vpsllw $7, (%rsp), %zmm0
@@ -2270,8 +2220,7 @@ define i128 @test_insertelement_variable_v128i1(<128 x i8> %a, i8 %b, i32 %index
; KNL-NEXT: vmovdqa %ymm2, {{[0-9]+}}(%rsp)
; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
-; KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rax
-; KNL-NEXT: setne (%rsi,%rax)
+; KNL-NEXT: setne 128(%rsp,%rsi)
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm1
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm2
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm3
@@ -2336,17 +2285,15 @@ define i128 @test_insertelement_variable_v128i1(<128 x i8> %a, i8 %b, i32 %index
; SKX-NEXT: subq $256, %rsp ## imm = 0x100
; SKX-NEXT: ## kill: def %esi killed %esi def %rsi
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; SKX-NEXT: vpcmpnleub %zmm2, %zmm0, %k1
-; SKX-NEXT: vpcmpnleub %zmm2, %zmm1, %k2
+; SKX-NEXT: vpcmpnleub %zmm2, %zmm0, %k0
+; SKX-NEXT: vpcmpnleub %zmm2, %zmm1, %k1
; SKX-NEXT: andl $127, %esi
; SKX-NEXT: testb %dil, %dil
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; SKX-NEXT: vmovdqu8 %zmm0, %zmm1 {%k2} {z}
-; SKX-NEXT: vmovdqa32 %zmm1, {{[0-9]+}}(%rsp)
-; SKX-NEXT: vmovdqu8 %zmm0, %zmm0 {%k1} {z}
+; SKX-NEXT: vpmovm2b %k1, %zmm0
+; SKX-NEXT: vmovdqa32 %zmm0, {{[0-9]+}}(%rsp)
+; SKX-NEXT: vpmovm2b %k0, %zmm0
; SKX-NEXT: vmovdqa32 %zmm0, (%rsp)
-; SKX-NEXT: movq %rsp, %rax
-; SKX-NEXT: setne (%rsi,%rax)
+; SKX-NEXT: setne (%rsp,%rsi)
; SKX-NEXT: vpsllw $7, {{[0-9]+}}(%rsp), %zmm0
; SKX-NEXT: vpmovb2m %zmm0, %k0
; SKX-NEXT: vpsllw $7, (%rsp), %zmm0