aboutsummaryrefslogtreecommitdiff
path: root/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/X86/avx512vl-vec-masked-cmp.ll')
-rw-r--r--test/CodeGen/X86/avx512vl-vec-masked-cmp.ll50906
1 files changed, 46473 insertions, 4433 deletions
diff --git a/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll b/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
index f297fc3db95f..4d3a1495617e 100644
--- a/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
+++ b/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
@@ -1,13 +1,124 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s -check-prefix=NoVLX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=VLX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=NoVLX
define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqb_v16i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi0:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi2:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi3:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi4:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi5:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi6:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi7:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
@@ -18,11 +129,122 @@ entry:
}
define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqb (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqb (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi8:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi9:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi10:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi11:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi12:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi13:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi14:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi15:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -34,12 +256,124 @@ entry:
}
define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi16:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi17:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi18:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi19:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi20:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi21:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi22:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi23:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
@@ -52,12 +386,124 @@ entry:
}
define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqb (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqb (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi24:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi25:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi26:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi27:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi28:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi29:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi30:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi31:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -72,11 +518,127 @@ entry:
define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqb_v16i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi32:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi33:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi34:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi35:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi36:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi37:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi38:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi39:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
@@ -87,11 +649,127 @@ entry:
}
define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqb (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqb (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi40:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi41:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi42:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi43:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi44:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi45:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi46:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi47:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -103,12 +781,129 @@ entry:
}
define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi48:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi49:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi50:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi51:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi52:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi53:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi54:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi55:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
@@ -121,12 +916,129 @@ entry:
}
define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqb (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqb (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi56:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi57:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi58:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi59:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi60:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi61:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi62:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi63:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -141,12 +1053,46 @@ entry:
define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqb_v32i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi64:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi65:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi66:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%1 = bitcast <4 x i64> %__b to <32 x i8>
@@ -157,12 +1103,46 @@ entry:
}
define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqb (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqb (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi67:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi68:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi69:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -174,13 +1154,56 @@ entry:
}
define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi70:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi71:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi72:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $96, %rsp
+; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
+; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
+; NoVLX-NEXT: vpxord %zmm4, %zmm4, %zmm4
+; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vptestmd %zmm4, %zmm4, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%1 = bitcast <4 x i64> %__b to <32 x i8>
@@ -193,13 +1216,56 @@ entry:
}
define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqb (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqb (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi73:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi74:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi75:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $96, %rsp
+; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
+; NoVLX-NEXT: vpxord %zmm3, %zmm3, %zmm3
+; NoVLX-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm4
+; NoVLX-NEXT: vpand %xmm2, %xmm4, %xmm2
+; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -214,11 +1280,24 @@ entry:
define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqw_v8i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
@@ -229,11 +1308,24 @@ entry:
}
define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqw_v8i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqw (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -245,12 +1337,26 @@ entry:
}
define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
@@ -263,12 +1369,26 @@ entry:
}
define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -283,11 +1403,72 @@ entry:
define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqw_v8i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi76:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi77:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi78:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
@@ -298,11 +1479,72 @@ entry:
}
define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqw (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi79:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi80:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi81:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -314,12 +1556,74 @@ entry:
}
define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi82:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi83:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi84:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
@@ -332,12 +1636,74 @@ entry:
}
define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi85:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi86:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi87:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -352,11 +1718,77 @@ entry:
define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqw_v8i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi88:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi89:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi90:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
@@ -367,11 +1799,77 @@ entry:
}
define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqw (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi91:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi92:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi93:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -383,12 +1881,79 @@ entry:
}
define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi94:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi95:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi96:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
@@ -401,12 +1966,79 @@ entry:
}
define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi97:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi98:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi99:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -421,12 +2053,123 @@ entry:
define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqw_v16i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi100:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi101:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi102:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi103:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi104:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi105:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi106:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi107:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
@@ -437,12 +2180,123 @@ entry:
}
define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqw (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi108:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi109:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi110:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi111:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi112:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi113:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi114:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi115:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -454,13 +2308,125 @@ entry:
}
define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi116:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi117:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi118:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi119:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi120:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi121:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi122:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi123:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
@@ -473,13 +2439,125 @@ entry:
}
define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi124:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi125:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi126:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi127:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi128:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi129:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi130:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi131:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -494,12 +2572,128 @@ entry:
define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqw_v16i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi132:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi133:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi134:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi135:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi136:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi137:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi138:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi139:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
@@ -510,12 +2704,128 @@ entry:
}
define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqw (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi140:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi141:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi142:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi143:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi144:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi145:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi146:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi147:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -527,13 +2837,130 @@ entry:
}
define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi148:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi149:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi150:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi151:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi152:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi153:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi154:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi155:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
@@ -546,13 +2973,130 @@ entry:
}
define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi156:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi157:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi158:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi159:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi160:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi161:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi162:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi163:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -567,12 +3111,348 @@ entry:
define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqw_v32i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi164:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi165:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi166:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm3
+; NoVLX-NEXT: vmovq %xmm3, %rax
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: movq %rax, %rdx
+; NoVLX-NEXT: vmovd %eax, %xmm2
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
+; NoVLX-NEXT: vextracti32x4 $1, %zmm1, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm2, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm7, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm6, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm8, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm3, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%1 = bitcast <8 x i64> %__b to <32 x i16>
@@ -583,12 +3463,263 @@ entry:
}
define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqw (%rdi), %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqw (%rdi), %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi167:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi168:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi169:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm2
+; NoVLX-NEXT: vmovq %xmm2, %rax
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: movq %rax, %rdx
+; NoVLX-NEXT: vmovd %eax, %xmm1
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpeqw 32(%rdi), %ymm1, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %eax, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -600,13 +3731,358 @@ entry:
}
define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi170:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi171:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi172:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $96, %rsp
+; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
+; NoVLX-NEXT: vmovq %xmm2, %rax
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: movq %rax, %rdx
+; NoVLX-NEXT: vmovd %eax, %xmm3
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
+; NoVLX-NEXT: vextracti32x4 $1, %zmm1, %xmm8
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
+; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm3, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm6, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm7, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm5, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm8, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm8
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
+; NoVLX-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k2} {z}
+; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm4
+; NoVLX-NEXT: vpmovdb %zmm6, %xmm6
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
+; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm8, %ymm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm3
+; NoVLX-NEXT: vpmovsxwd %ymm4, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpand %xmm6, %xmm2, %xmm2
+; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpand %xmm0, %xmm3, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%1 = bitcast <8 x i64> %__b to <32 x i16>
@@ -619,13 +4095,273 @@ entry:
}
define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqw (%rsi), %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqw (%rsi), %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi173:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi174:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi175:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $96, %rsp
+; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
+; NoVLX-NEXT: vmovq %xmm1, %rax
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: movq %rax, %rdx
+; NoVLX-NEXT: vmovd %eax, %xmm2
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm3
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm2
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm3, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm4
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm3
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm0
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm5
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm4
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm2
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm5, %ymm3
+; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm3, %ymm3
+; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
+; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %eax, %xmm3
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm4, %ymm4
+; NoVLX-NEXT: vpmovsxwd %ymm4, %zmm4
+; NoVLX-NEXT: vpslld $31, %zmm4, %zmm4
+; NoVLX-NEXT: vptestmd %zmm4, %zmm4, %k0
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpand %xmm2, %xmm4, %xmm2
+; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -640,11 +4376,51 @@ entry:
define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqd_v4i1_v8i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -655,11 +4431,51 @@ entry:
}
define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -671,12 +4487,70 @@ entry:
}
define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -690,12 +4564,70 @@ entry:
}
define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -711,11 +4643,52 @@ entry:
define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -728,12 +4701,71 @@ entry:
}
define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -750,11 +4782,50 @@ entry:
define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqd_v4i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -765,11 +4836,50 @@ entry:
}
define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -781,12 +4891,69 @@ entry:
}
define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -800,12 +4967,69 @@ entry:
}
define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -821,11 +5045,51 @@ entry:
define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -838,12 +5102,70 @@ entry:
}
define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -860,11 +5182,39 @@ entry:
define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqd_v4i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi176:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi177:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi178:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -875,11 +5225,39 @@ entry:
}
define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi179:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi180:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi181:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -891,12 +5269,58 @@ entry:
}
define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi182:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi183:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi184:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -910,12 +5334,58 @@ entry:
}
define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi185:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi186:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi187:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -931,11 +5401,40 @@ entry:
define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi188:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi189:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi190:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -948,12 +5447,59 @@ entry:
}
define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi191:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi192:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi193:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -970,11 +5516,46 @@ entry:
define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqd_v4i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi194:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi195:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi196:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -985,11 +5566,46 @@ entry:
}
define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi197:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi198:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi199:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -1001,12 +5617,65 @@ entry:
}
define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi200:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi201:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi202:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -1020,12 +5689,65 @@ entry:
}
define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi203:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi204:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi205:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -1041,11 +5763,47 @@ entry:
define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi206:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi207:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi208:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -1058,12 +5816,66 @@ entry:
}
define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi209:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi210:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi211:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -1080,21 +5892,23 @@ entry:
define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqd_v8i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask:
-; NoVLX: ## BB#0: ## %entry
-; NoVLX-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
-; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -1106,21 +5920,23 @@ entry:
}
define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem:
-; NoVLX: ## BB#0: ## %entry
-; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -1133,23 +5949,25 @@ entry:
}
define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask:
-; NoVLX: ## BB#0: ## %entry
-; NoVLX-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
-; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -1163,23 +5981,25 @@ entry:
}
define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem:
-; NoVLX: ## BB#0: ## %entry
-; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -1195,21 +6015,23 @@ entry:
define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b:
-; NoVLX: ## BB#0: ## %entry
-; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -1223,23 +6045,25 @@ entry:
}
define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b:
-; NoVLX: ## BB#0: ## %entry
-; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -1256,12 +6080,72 @@ entry:
define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqd_v8i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi212:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi213:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi214:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
@@ -1272,12 +6156,72 @@ entry:
}
define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi215:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi216:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi217:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -1289,13 +6233,75 @@ entry:
}
define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi218:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi219:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi220:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: kandw %k1, %k0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
@@ -1308,13 +6314,75 @@ entry:
}
define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi221:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi222:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi223:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: kandw %k1, %k0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -1329,12 +6397,72 @@ entry:
define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi224:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi225:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi226:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
@@ -1347,13 +6475,75 @@ entry:
}
define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi227:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi228:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi229:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: kandw %k0, %k1, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
@@ -1369,12 +6559,77 @@ entry:
define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqd_v8i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi230:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi231:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi232:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
@@ -1385,12 +6640,77 @@ entry:
}
define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi233:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi234:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi235:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -1402,13 +6722,80 @@ entry:
}
define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi236:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi237:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi238:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: kandw %k1, %k0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
@@ -1421,13 +6808,80 @@ entry:
}
define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi239:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi240:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi241:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: kandw %k1, %k0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -1442,12 +6896,77 @@ entry:
define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi242:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi243:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi244:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
@@ -1460,13 +6979,80 @@ entry:
}
define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi245:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi246:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi247:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: kandw %k0, %k1, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
@@ -1482,12 +7068,120 @@ entry:
define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqd_v16i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi248:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi249:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi250:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi251:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi252:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi253:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi254:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi255:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
@@ -1498,12 +7192,120 @@ entry:
}
define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi256:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi257:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi258:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi259:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi260:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi261:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi262:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi263:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -1515,13 +7317,122 @@ entry:
}
define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi264:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi265:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi266:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi267:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi268:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi269:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi270:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi271:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
@@ -1534,13 +7445,122 @@ entry:
}
define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi272:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi273:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi274:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi275:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi276:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi277:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi278:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi279:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -1555,12 +7575,120 @@ entry:
define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi280:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi281:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi282:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi283:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi284:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi285:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi286:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi287:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
@@ -1573,13 +7701,122 @@ entry:
}
define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi288:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi289:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi290:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi291:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi292:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi293:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi294:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi295:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
@@ -1595,12 +7832,125 @@ entry:
define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqd_v16i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi296:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi297:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi298:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi299:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi300:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi301:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi302:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi303:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
@@ -1611,12 +7961,125 @@ entry:
}
define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi304:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi305:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi306:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi307:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi308:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi309:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi310:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi311:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -1628,13 +8091,127 @@ entry:
}
define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi312:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi313:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi314:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi315:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi316:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi317:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi318:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi319:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
@@ -1647,13 +8224,127 @@ entry:
}
define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi320:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi321:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi322:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi323:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi324:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi325:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi326:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi327:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -1668,12 +8359,125 @@ entry:
define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi328:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi329:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi330:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi331:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi332:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi333:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi334:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi335:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
@@ -1686,13 +8490,127 @@ entry:
}
define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi336:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi337:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi338:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi339:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi340:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi341:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi342:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi343:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
@@ -1708,12 +8626,23 @@ entry:
define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v2i1_v4i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -1724,12 +8653,23 @@ entry:
}
define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -1741,13 +8681,34 @@ entry:
}
define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -1761,13 +8722,34 @@ entry:
}
define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -1783,12 +8765,24 @@ entry:
define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
+; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -1801,13 +8795,35 @@ entry:
}
define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -1824,11 +8840,35 @@ entry:
define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v2i1_v8i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -1839,11 +8879,35 @@ entry:
}
define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -1855,12 +8919,46 @@ entry:
}
define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -1874,12 +8972,46 @@ entry:
}
define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -1895,11 +9027,36 @@ entry:
define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -1912,12 +9069,47 @@ entry:
}
define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -1934,11 +9126,34 @@ entry:
define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v2i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -1949,11 +9164,34 @@ entry:
}
define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -1965,12 +9203,45 @@ entry:
}
define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -1984,12 +9255,45 @@ entry:
}
define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -2005,11 +9309,35 @@ entry:
define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -2022,12 +9350,46 @@ entry:
}
define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -2044,11 +9406,39 @@ entry:
define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v2i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi344:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi345:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi346:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -2059,11 +9449,39 @@ entry:
}
define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi347:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi348:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi349:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -2075,12 +9493,50 @@ entry:
}
define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi350:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi351:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi352:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -2094,12 +9550,50 @@ entry:
}
define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi353:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi354:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi355:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -2115,11 +9609,40 @@ entry:
define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi356:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi357:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi358:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -2132,12 +9655,51 @@ entry:
}
define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi359:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi360:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi361:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -2154,11 +9716,46 @@ entry:
define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v2i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi362:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi363:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi364:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -2169,11 +9766,46 @@ entry:
}
define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi365:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi366:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi367:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -2185,12 +9817,57 @@ entry:
}
define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi368:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi369:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi370:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -2204,12 +9881,57 @@ entry:
}
define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi371:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi372:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi373:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -2225,11 +9947,47 @@ entry:
define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi374:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi375:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi376:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -2242,12 +10000,58 @@ entry:
}
define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi377:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi378:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi379:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -2264,12 +10068,53 @@ entry:
define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v4i1_v8i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -2280,12 +10125,53 @@ entry:
}
define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -2297,13 +10183,72 @@ entry:
}
define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -2317,13 +10262,72 @@ entry:
}
define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -2339,12 +10343,54 @@ entry:
define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -2357,13 +10403,73 @@ entry:
}
define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -2380,12 +10486,52 @@ entry:
define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v4i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -2396,12 +10542,52 @@ entry:
}
define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -2413,13 +10599,71 @@ entry:
}
define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -2433,13 +10677,71 @@ entry:
}
define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -2455,12 +10757,53 @@ entry:
define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -2473,13 +10816,72 @@ entry:
}
define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -2496,12 +10898,41 @@ entry:
define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v4i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi380:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi381:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi382:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -2512,12 +10943,41 @@ entry:
}
define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi383:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi384:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi385:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -2529,13 +10989,60 @@ entry:
}
define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi386:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi387:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi388:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -2549,13 +11056,60 @@ entry:
}
define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi389:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi390:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi391:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -2571,12 +11125,42 @@ entry:
define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi392:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi393:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi394:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -2589,13 +11173,61 @@ entry:
}
define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi395:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi396:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi397:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -2612,12 +11244,48 @@ entry:
define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v4i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi398:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi399:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi400:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -2628,12 +11296,48 @@ entry:
}
define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi401:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi402:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi403:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -2645,13 +11349,67 @@ entry:
}
define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi404:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi405:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi406:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -2665,13 +11423,67 @@ entry:
}
define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi407:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi408:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi409:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -2687,12 +11499,49 @@ entry:
define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi410:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi411:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi412:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -2705,13 +11554,68 @@ entry:
}
define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi413:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi414:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi415:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -2728,12 +11632,20 @@ entry:
define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v8i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
@@ -2744,12 +11656,20 @@ entry:
}
define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -2761,13 +11681,22 @@ entry:
}
define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
@@ -2780,13 +11709,22 @@ entry:
}
define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -2801,12 +11739,20 @@ entry:
define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
@@ -2819,13 +11765,22 @@ entry:
}
define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
@@ -2841,12 +11796,70 @@ entry:
define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v8i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi416:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi417:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi418:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
@@ -2857,12 +11870,70 @@ entry:
}
define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi419:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi420:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi421:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -2874,13 +11945,72 @@ entry:
}
define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi422:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi423:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi424:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
@@ -2893,13 +12023,72 @@ entry:
}
define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi425:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi426:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi427:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -2914,12 +12103,70 @@ entry:
define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi428:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi429:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi430:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
@@ -2932,13 +12179,72 @@ entry:
}
define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi431:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi432:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi433:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
@@ -2954,12 +12260,75 @@ entry:
define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v8i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi434:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi435:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi436:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
@@ -2970,12 +12339,75 @@ entry:
}
define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi437:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi438:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi439:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -2987,13 +12419,77 @@ entry:
}
define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi440:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi441:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi442:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
@@ -3006,13 +12502,77 @@ entry:
}
define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi443:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi444:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi445:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -3027,12 +12587,75 @@ entry:
define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi446:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi447:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi448:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
@@ -3045,13 +12668,77 @@ entry:
}
define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi449:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi450:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi451:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
@@ -3067,11 +12754,122 @@ entry:
define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi452:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi453:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi454:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi455:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi456:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi457:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi458:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi459:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
@@ -3082,11 +12880,122 @@ entry:
}
define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtb (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtb (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi460:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi461:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi462:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi463:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi464:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi465:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi466:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi467:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -3098,12 +13007,124 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi468:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi469:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi470:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi471:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi472:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi473:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi474:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi475:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
@@ -3116,12 +13137,124 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi476:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi477:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi478:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi479:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi480:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi481:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi482:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi483:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -3136,11 +13269,127 @@ entry:
define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi484:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi485:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi486:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi487:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi488:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi489:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi490:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi491:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
@@ -3151,11 +13400,127 @@ entry:
}
define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtb (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtb (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi492:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi493:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi494:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi495:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi496:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi497:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi498:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi499:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -3167,12 +13532,129 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi500:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi501:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi502:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi503:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi504:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi505:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi506:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi507:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
@@ -3185,12 +13667,129 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi508:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi509:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi510:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi511:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi512:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi513:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi514:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi515:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -3205,12 +13804,46 @@ entry:
define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi516:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi517:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi518:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%1 = bitcast <4 x i64> %__b to <32 x i8>
@@ -3221,12 +13854,46 @@ entry:
}
define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtb (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtb (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi519:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi520:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi521:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -3238,13 +13905,56 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi522:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi523:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi524:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $96, %rsp
+; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
+; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
+; NoVLX-NEXT: vpxord %zmm4, %zmm4, %zmm4
+; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vptestmd %zmm4, %zmm4, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%1 = bitcast <4 x i64> %__b to <32 x i8>
@@ -3257,13 +13967,56 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtb (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtb (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi525:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi526:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi527:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $96, %rsp
+; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
+; NoVLX-NEXT: vpxord %zmm3, %zmm3, %zmm3
+; NoVLX-NEXT: vpcmpgtb (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm4
+; NoVLX-NEXT: vpand %xmm2, %xmm4, %xmm2
+; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -3278,11 +14031,24 @@ entry:
define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
@@ -3293,11 +14059,24 @@ entry:
}
define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtw (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -3309,12 +14088,26 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
@@ -3327,12 +14120,26 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -3347,11 +14154,72 @@ entry:
define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi528:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi529:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi530:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
@@ -3362,11 +14230,72 @@ entry:
}
define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtw (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi531:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi532:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi533:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -3378,12 +14307,74 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi534:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi535:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi536:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
@@ -3396,12 +14387,74 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi537:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi538:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi539:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -3416,11 +14469,77 @@ entry:
define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi540:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi541:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi542:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
@@ -3431,11 +14550,77 @@ entry:
}
define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtw (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi543:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi544:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi545:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -3447,12 +14632,79 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi546:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi547:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi548:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
@@ -3465,12 +14717,79 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi549:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi550:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi551:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -3485,12 +14804,123 @@ entry:
define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi552:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi553:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi554:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi555:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi556:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi557:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi558:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi559:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
@@ -3501,12 +14931,123 @@ entry:
}
define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtw (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi560:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi561:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi562:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi563:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi564:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi565:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi566:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi567:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -3518,13 +15059,125 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi568:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi569:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi570:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi571:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi572:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi573:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi574:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi575:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
@@ -3537,13 +15190,125 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtw (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtw (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi576:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi577:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi578:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi579:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi580:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi581:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi582:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi583:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -3558,12 +15323,128 @@ entry:
define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi584:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi585:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi586:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi587:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi588:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi589:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi590:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi591:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
@@ -3574,12 +15455,128 @@ entry:
}
define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtw (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi592:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi593:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi594:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi595:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi596:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi597:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi598:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi599:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -3591,13 +15588,130 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi600:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi601:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi602:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi603:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi604:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi605:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi606:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi607:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
@@ -3610,13 +15724,130 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtw (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtw (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi608:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi609:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi610:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi611:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi612:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi613:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi614:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi615:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -3631,12 +15862,348 @@ entry:
define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtw %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtw %zmm1, %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi616:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi617:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi618:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm3
+; NoVLX-NEXT: vmovq %xmm3, %rax
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: movq %rax, %rdx
+; NoVLX-NEXT: vmovd %eax, %xmm2
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
+; NoVLX-NEXT: vextracti32x4 $1, %zmm1, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm2, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm7, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm6, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm8, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%1 = bitcast <8 x i64> %__b to <32 x i16>
@@ -3647,12 +16214,263 @@ entry:
}
define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtw (%rdi), %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtw (%rdi), %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi619:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi620:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi621:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm2
+; NoVLX-NEXT: vmovq %xmm2, %rax
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: movq %rax, %rdx
+; NoVLX-NEXT: vmovd %eax, %xmm1
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw 32(%rdi), %ymm1, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %eax, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -3664,13 +16482,358 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi622:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi623:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi624:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $96, %rsp
+; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
+; NoVLX-NEXT: vmovq %xmm2, %rax
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: movq %rax, %rdx
+; NoVLX-NEXT: vmovd %eax, %xmm3
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
+; NoVLX-NEXT: vextracti32x4 $1, %zmm1, %xmm8
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
+; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm3, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm6, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm7, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm5, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm8, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm8
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
+; NoVLX-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k2} {z}
+; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm4
+; NoVLX-NEXT: vpmovdb %zmm6, %xmm6
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
+; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm8, %ymm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm3
+; NoVLX-NEXT: vpmovsxwd %ymm4, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpand %xmm6, %xmm2, %xmm2
+; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpand %xmm0, %xmm3, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%1 = bitcast <8 x i64> %__b to <32 x i16>
@@ -3683,13 +16846,273 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtw (%rsi), %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtw (%rsi), %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi625:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi626:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi627:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $96, %rsp
+; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
+; NoVLX-NEXT: vmovq %xmm1, %rax
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: movq %rax, %rdx
+; NoVLX-NEXT: vmovd %eax, %xmm2
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm3
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm2
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm3, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm4
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm3
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm0
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm5
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm4
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm2
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm5, %ymm3
+; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm3, %ymm3
+; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
+; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %eax, %xmm3
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm4, %ymm4
+; NoVLX-NEXT: vpmovsxwd %ymm4, %zmm4
+; NoVLX-NEXT: vpslld $31, %zmm4, %zmm4
+; NoVLX-NEXT: vptestmd %zmm4, %zmm4, %k0
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpand %xmm2, %xmm4, %xmm2
+; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -3704,11 +17127,51 @@ entry:
define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -3719,11 +17182,51 @@ entry:
}
define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -3735,12 +17238,70 @@ entry:
}
define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -3754,12 +17315,70 @@ entry:
}
define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -3775,11 +17394,52 @@ entry:
define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -3792,12 +17452,71 @@ entry:
}
define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -3814,11 +17533,50 @@ entry:
define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -3829,11 +17587,50 @@ entry:
}
define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -3845,12 +17642,69 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -3864,12 +17718,69 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -3885,11 +17796,51 @@ entry:
define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -3902,12 +17853,70 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -3924,11 +17933,39 @@ entry:
define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi628:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi629:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi630:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -3939,11 +17976,39 @@ entry:
}
define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi631:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi632:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi633:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -3955,12 +18020,58 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi634:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi635:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi636:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -3974,12 +18085,58 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi637:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi638:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi639:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -3995,11 +18152,40 @@ entry:
define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi640:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi641:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi642:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -4012,12 +18198,59 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi643:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi644:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi645:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -4034,11 +18267,46 @@ entry:
define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi646:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi647:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi648:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -4049,11 +18317,46 @@ entry:
}
define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi649:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi650:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi651:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -4065,12 +18368,65 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi652:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi653:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi654:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -4084,12 +18440,65 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi655:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi656:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi657:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -4105,11 +18514,47 @@ entry:
define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi658:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi659:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi660:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -4122,12 +18567,66 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi661:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi662:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi663:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -4144,21 +18643,23 @@ entry:
define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask:
-; NoVLX: ## BB#0: ## %entry
-; NoVLX-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
-; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -4170,21 +18671,23 @@ entry:
}
define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtd (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem:
-; NoVLX: ## BB#0: ## %entry
-; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -4197,23 +18700,25 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask:
-; NoVLX: ## BB#0: ## %entry
-; NoVLX-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
-; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -4227,23 +18732,25 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem:
-; NoVLX: ## BB#0: ## %entry
-; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -4259,21 +18766,23 @@ entry:
define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
-; NoVLX: ## BB#0: ## %entry
-; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -4287,23 +18796,25 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
-; NoVLX: ## BB#0: ## %entry
-; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -4320,12 +18831,72 @@ entry:
define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi664:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi665:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi666:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
@@ -4336,12 +18907,72 @@ entry:
}
define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtd (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi667:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi668:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi669:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -4353,13 +18984,75 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi670:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi671:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi672:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: kandw %k1, %k0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
@@ -4372,13 +19065,75 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi673:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi674:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi675:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: kandw %k1, %k0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -4393,12 +19148,72 @@ entry:
define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi676:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi677:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi678:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
@@ -4411,13 +19226,75 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi679:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi680:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi681:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: kandw %k0, %k1, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
@@ -4433,12 +19310,77 @@ entry:
define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi682:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi683:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi684:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
@@ -4449,12 +19391,77 @@ entry:
}
define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtd (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi685:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi686:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi687:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -4466,13 +19473,80 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi688:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi689:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi690:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: kandw %k1, %k0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
@@ -4485,13 +19559,80 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi691:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi692:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi693:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: kandw %k1, %k0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -4506,12 +19647,77 @@ entry:
define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi694:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi695:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi696:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
@@ -4524,13 +19730,80 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi697:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi698:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi699:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: kandw %k0, %k1, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
@@ -4546,12 +19819,120 @@ entry:
define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi700:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi701:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi702:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi703:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi704:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi705:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi706:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi707:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
@@ -4562,12 +19943,120 @@ entry:
}
define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi708:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi709:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi710:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi711:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi712:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi713:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi714:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi715:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -4579,13 +20068,122 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi716:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi717:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi718:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi719:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi720:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi721:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi722:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi723:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
@@ -4598,13 +20196,122 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi724:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi725:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi726:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi727:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi728:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi729:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi730:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi731:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -4619,12 +20326,120 @@ entry:
define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi732:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi733:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi734:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi735:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi736:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi737:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi738:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi739:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
@@ -4637,13 +20452,122 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi740:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi741:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi742:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi743:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi744:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi745:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi746:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi747:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
@@ -4659,12 +20583,125 @@ entry:
define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi748:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi749:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi750:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi751:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi752:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi753:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi754:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi755:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
@@ -4675,12 +20712,125 @@ entry:
}
define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi756:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi757:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi758:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi759:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi760:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi761:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi762:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi763:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -4692,13 +20842,127 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi764:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi765:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi766:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi767:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi768:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi769:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi770:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi771:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
@@ -4711,13 +20975,127 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi772:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi773:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi774:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi775:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi776:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi777:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi778:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi779:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -4732,12 +21110,125 @@ entry:
define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi780:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi781:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi782:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi783:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi784:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi785:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi786:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi787:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
@@ -4750,13 +21241,127 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi788:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi789:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi790:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi791:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi792:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi793:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi794:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi795:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
@@ -4772,12 +21377,23 @@ entry:
define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -4788,12 +21404,23 @@ entry:
}
define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -4805,13 +21432,34 @@ entry:
}
define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -4825,13 +21473,34 @@ entry:
}
define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -4847,12 +21516,24 @@ entry:
define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
+; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -4865,13 +21546,35 @@ entry:
}
define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -4888,11 +21591,35 @@ entry:
define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -4903,11 +21630,35 @@ entry:
}
define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -4919,12 +21670,46 @@ entry:
}
define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -4938,12 +21723,46 @@ entry:
}
define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -4959,11 +21778,36 @@ entry:
define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -4976,12 +21820,47 @@ entry:
}
define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -4998,11 +21877,34 @@ entry:
define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -5013,11 +21915,34 @@ entry:
}
define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -5029,12 +21954,45 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -5048,12 +22006,45 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -5069,11 +22060,35 @@ entry:
define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -5086,12 +22101,46 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -5108,11 +22157,39 @@ entry:
define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi796:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi797:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi798:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -5123,11 +22200,39 @@ entry:
}
define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi799:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi800:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi801:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -5139,12 +22244,50 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi802:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi803:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi804:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -5158,12 +22301,50 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi805:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi806:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi807:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -5179,11 +22360,40 @@ entry:
define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi808:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi809:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi810:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -5196,12 +22406,51 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi811:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi812:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi813:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -5218,11 +22467,46 @@ entry:
define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi814:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi815:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi816:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -5233,11 +22517,46 @@ entry:
}
define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi817:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi818:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi819:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -5249,12 +22568,57 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi820:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi821:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi822:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -5268,12 +22632,57 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi823:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi824:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi825:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -5289,11 +22698,47 @@ entry:
define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi826:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi827:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi828:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -5306,12 +22751,58 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi829:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi830:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi831:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -5328,12 +22819,53 @@ entry:
define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -5344,12 +22876,53 @@ entry:
}
define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -5361,13 +22934,72 @@ entry:
}
define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -5381,13 +23013,72 @@ entry:
}
define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -5403,12 +23094,54 @@ entry:
define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -5421,13 +23154,73 @@ entry:
}
define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -5444,12 +23237,52 @@ entry:
define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -5460,12 +23293,52 @@ entry:
}
define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -5477,13 +23350,71 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -5497,13 +23428,71 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -5519,12 +23508,53 @@ entry:
define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -5537,13 +23567,72 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -5560,12 +23649,41 @@ entry:
define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi832:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi833:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi834:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -5576,12 +23694,41 @@ entry:
}
define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi835:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi836:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi837:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -5593,13 +23740,60 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi838:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi839:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi840:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -5613,13 +23807,60 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi841:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi842:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi843:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -5635,12 +23876,42 @@ entry:
define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi844:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi845:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi846:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -5653,13 +23924,61 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi847:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi848:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi849:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -5676,12 +23995,48 @@ entry:
define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi850:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi851:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi852:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -5692,12 +24047,48 @@ entry:
}
define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi853:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi854:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi855:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -5709,13 +24100,67 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi856:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi857:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi858:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -5729,13 +24174,67 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi859:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi860:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi861:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -5751,12 +24250,49 @@ entry:
define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi862:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi863:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi864:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -5769,13 +24305,68 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi865:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi866:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi867:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -5792,12 +24383,20 @@ entry:
define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
@@ -5808,12 +24407,20 @@ entry:
}
define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -5825,13 +24432,22 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
@@ -5844,13 +24460,22 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -5865,12 +24490,20 @@ entry:
define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
@@ -5883,13 +24516,22 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
@@ -5905,12 +24547,70 @@ entry:
define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi868:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi869:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi870:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
@@ -5921,12 +24621,70 @@ entry:
}
define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi871:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi872:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi873:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -5938,13 +24696,72 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi874:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi875:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi876:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
@@ -5957,13 +24774,72 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi877:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi878:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi879:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -5978,12 +24854,70 @@ entry:
define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi880:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi881:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi882:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
@@ -5996,13 +24930,72 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi883:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi884:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi885:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
@@ -6018,12 +25011,75 @@ entry:
define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi886:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi887:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi888:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
@@ -6034,12 +25090,75 @@ entry:
}
define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi889:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi890:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi891:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -6051,13 +25170,77 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi892:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi893:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi894:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
@@ -6070,13 +25253,77 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi895:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi896:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi897:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -6091,12 +25338,75 @@ entry:
define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi898:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi899:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi900:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
@@ -6109,13 +25419,77 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi901:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi902:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi903:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
@@ -6131,11 +25505,124 @@ entry:
define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpleb %xmm0, %xmm1, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpleb %xmm0, %xmm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi904:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi905:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi906:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi907:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi908:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi909:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi910:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi911:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
@@ -6146,11 +25633,125 @@ entry:
}
define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpnltb (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpnltb (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi912:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi913:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi914:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi915:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi916:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi917:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi918:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi919:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -6162,12 +25763,126 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpleb %xmm0, %xmm1, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpleb %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi920:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi921:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi922:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi923:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi924:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi925:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi926:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi927:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
@@ -6180,12 +25895,127 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi928:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi929:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi930:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi931:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi932:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi933:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi934:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi935:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -6200,11 +26030,129 @@ entry:
define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpleb %xmm0, %xmm1, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpleb %xmm0, %xmm1, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi936:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi937:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi938:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi939:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi940:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi941:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi942:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi943:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
@@ -6215,11 +26163,130 @@ entry:
}
define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpnltb (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpnltb (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi944:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi945:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi946:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi947:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi948:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi949:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi950:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi951:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -6231,12 +26298,131 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpleb %xmm0, %xmm1, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpleb %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi952:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi953:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi954:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi955:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi956:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi957:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi958:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi959:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
@@ -6249,12 +26435,132 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi960:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi961:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi962:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi963:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi964:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi965:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi966:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi967:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -6269,12 +26575,48 @@ entry:
define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpleb %ymm0, %ymm1, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpleb %ymm0, %ymm1, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi968:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi969:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi970:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%1 = bitcast <4 x i64> %__b to <32 x i8>
@@ -6285,12 +26627,49 @@ entry:
}
define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpnltb (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpnltb (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi971:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi972:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi973:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -6302,13 +26681,58 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpleb %ymm0, %ymm1, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpleb %ymm0, %ymm1, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi974:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi975:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi976:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $96, %rsp
+; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
+; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
+; NoVLX-NEXT: vpxord %zmm4, %zmm4, %zmm4
+; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vptestmd %zmm4, %zmm4, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%1 = bitcast <4 x i64> %__b to <32 x i8>
@@ -6321,13 +26745,59 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpnltb (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpnltb (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi977:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi978:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi979:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $96, %rsp
+; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
+; NoVLX-NEXT: vpxord %zmm3, %zmm3, %zmm3
+; NoVLX-NEXT: vmovdqa (%rsi), %ymm4
+; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm4, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4
+; NoVLX-NEXT: vpxor %ymm4, %ymm0, %ymm0
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm4
+; NoVLX-NEXT: vpand %xmm2, %xmm4, %xmm2
+; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -6342,11 +26812,26 @@ entry:
define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgew_v8i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmplew %xmm0, %xmm1, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmplew %xmm0, %xmm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
@@ -6357,11 +26842,27 @@ entry:
}
define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgew_v8i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpnltw (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -6373,12 +26874,28 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmplew %xmm0, %xmm1, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmplew %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
@@ -6391,12 +26908,29 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -6411,11 +26945,74 @@ entry:
define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgew_v8i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmplew %xmm0, %xmm1, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmplew %xmm0, %xmm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi980:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi981:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi982:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
@@ -6426,11 +27023,75 @@ entry:
}
define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpnltw (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi983:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi984:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi985:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -6442,12 +27103,76 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmplew %xmm0, %xmm1, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmplew %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi986:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi987:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi988:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
@@ -6460,12 +27185,77 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi989:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi990:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi991:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -6480,11 +27270,79 @@ entry:
define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgew_v8i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmplew %xmm0, %xmm1, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmplew %xmm0, %xmm1, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi992:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi993:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi994:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
@@ -6495,11 +27353,80 @@ entry:
}
define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpnltw (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi995:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi996:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi997:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -6511,12 +27438,81 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmplew %xmm0, %xmm1, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmplew %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi998:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi999:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1000:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
@@ -6529,12 +27525,82 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1001:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1002:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1003:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -6549,12 +27615,125 @@ entry:
define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgew_v16i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmplew %ymm0, %ymm1, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmplew %ymm0, %ymm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1004:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1005:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1006:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1007:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1008:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1009:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1010:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1011:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
@@ -6565,12 +27744,126 @@ entry:
}
define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpnltw (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpnltw (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1012:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1013:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1014:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1015:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1016:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1017:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1018:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1019:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -6582,13 +27875,127 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmplew %ymm0, %ymm1, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmplew %ymm0, %ymm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1020:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1021:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1022:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1023:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1024:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1025:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1026:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1027:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
@@ -6601,13 +28008,128 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpnltw (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpnltw (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1028:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1029:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1030:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1031:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1032:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1033:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1034:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1035:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -6622,12 +28144,130 @@ entry:
define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgew_v16i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmplew %ymm0, %ymm1, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmplew %ymm0, %ymm1, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1036:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1037:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1038:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi1039:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1040:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1041:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1042:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1043:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
@@ -6638,12 +28278,131 @@ entry:
}
define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpnltw (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpnltw (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1044:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1045:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1046:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi1047:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1048:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1049:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1050:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1051:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -6655,13 +28414,132 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmplew %ymm0, %ymm1, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmplew %ymm0, %ymm1, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1052:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1053:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1054:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi1055:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1056:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1057:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1058:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1059:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
@@ -6674,13 +28552,133 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpnltw (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpnltw (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1060:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1061:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1062:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi1063:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1064:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1065:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1066:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1067:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -6695,12 +28693,351 @@ entry:
define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgew_v32i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmplew %zmm0, %zmm1, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmplew %zmm0, %zmm1, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1068:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1069:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1070:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm3
+; NoVLX-NEXT: vmovq %xmm3, %rax
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: movq %rax, %rdx
+; NoVLX-NEXT: vmovd %eax, %xmm2
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
+; NoVLX-NEXT: vextracti32x4 $1, %zmm1, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm2, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm7, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm6, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm8, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm2
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm2, %ymm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%1 = bitcast <8 x i64> %__b to <32 x i16>
@@ -6711,12 +29048,268 @@ entry:
}
define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpnltw (%rdi), %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpnltw (%rdi), %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1071:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1072:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1073:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm2
+; NoVLX-NEXT: vmovq %xmm2, %rax
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: movq %rax, %rdx
+; NoVLX-NEXT: vmovd %eax, %xmm1
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
+; NoVLX-NEXT: vmovdqa (%rdi), %ymm2
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
+; NoVLX-NEXT: vmovdqa 32(%rdi), %ymm2
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm2
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm2, %ymm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -6728,13 +29321,361 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmplew %zmm0, %zmm1, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmplew %zmm0, %zmm1, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1074:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1075:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1076:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $96, %rsp
+; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
+; NoVLX-NEXT: vmovq %xmm2, %rax
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: movq %rax, %rdx
+; NoVLX-NEXT: vmovd %eax, %xmm3
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
+; NoVLX-NEXT: vextracti32x4 $1, %zmm1, %xmm8
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
+; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm3, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm6, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm7, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm5, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm8, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm8
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
+; NoVLX-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k2} {z}
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm4
+; NoVLX-NEXT: vpmovdb %zmm6, %xmm6
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
+; NoVLX-NEXT: vpcmpgtw %ymm8, %ymm2, %ymm2
+; NoVLX-NEXT: vpcmpeqd %ymm5, %ymm5, %ymm5
+; NoVLX-NEXT: vpxor %ymm5, %ymm2, %ymm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm3
+; NoVLX-NEXT: vpxor %ymm5, %ymm4, %ymm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpand %xmm6, %xmm2, %xmm2
+; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpand %xmm0, %xmm3, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%1 = bitcast <8 x i64> %__b to <32 x i16>
@@ -6747,13 +29688,278 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpnltw (%rsi), %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpnltw (%rsi), %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1077:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1078:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1079:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $96, %rsp
+; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
+; NoVLX-NEXT: vmovq %xmm1, %rax
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: movq %rax, %rdx
+; NoVLX-NEXT: vmovd %eax, %xmm2
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm3
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm2
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm3, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm4
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm3
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm0
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm5
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm4
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm2
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm5, %ymm3
+; NoVLX-NEXT: vmovdqa (%rsi), %ymm5
+; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm5, %ymm3
+; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm5
+; NoVLX-NEXT: vpcmpgtw %ymm4, %ymm5, %ymm4
+; NoVLX-NEXT: vpcmpeqd %ymm5, %ymm5, %ymm5
+; NoVLX-NEXT: vpxor %ymm5, %ymm3, %ymm3
+; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
+; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpxor %ymm5, %ymm4, %ymm4
+; NoVLX-NEXT: vpmovsxwd %ymm4, %zmm4
+; NoVLX-NEXT: vpslld $31, %zmm4, %zmm4
+; NoVLX-NEXT: vptestmd %zmm4, %zmm4, %k0
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpand %xmm2, %xmm4, %xmm2
+; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -6768,11 +29974,53 @@ entry:
define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsged_v4i1_v8i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -6783,11 +30031,54 @@ entry:
}
define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -6799,12 +30090,70 @@ entry:
}
define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -6818,12 +30167,71 @@ entry:
}
define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -6839,12 +30247,55 @@ entry:
define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastd (%rdi), %xmm1
-; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastd (%rdi), %xmm1
+; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -6857,13 +30308,72 @@ entry:
}
define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastd (%rsi), %xmm1
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -6880,11 +30390,52 @@ entry:
define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsged_v4i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -6895,11 +30446,53 @@ entry:
}
define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -6911,12 +30504,69 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -6930,12 +30580,70 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -6951,12 +30659,54 @@ entry:
define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastd (%rdi), %xmm1
-; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastd (%rdi), %xmm1
+; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -6969,13 +30719,71 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastd (%rsi), %xmm1
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -6992,11 +30800,41 @@ entry:
define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsged_v4i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1080:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1081:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1082:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -7007,11 +30845,42 @@ entry:
}
define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1083:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1084:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1085:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -7023,12 +30892,58 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1086:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1087:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1088:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -7042,12 +30957,59 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1089:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1090:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1091:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -7063,12 +31025,43 @@ entry:
define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastd (%rdi), %xmm1
-; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastd (%rdi), %xmm1
+; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1092:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1093:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1094:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -7081,13 +31074,60 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastd (%rsi), %xmm1
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1095:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1096:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1097:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -7104,11 +31144,48 @@ entry:
define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsged_v4i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1098:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1099:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1100:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -7119,11 +31196,49 @@ entry:
}
define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1101:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1102:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1103:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -7135,12 +31250,65 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1104:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1105:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1106:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -7154,12 +31322,66 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1107:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1108:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1109:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -7175,12 +31397,50 @@ entry:
define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastd (%rdi), %xmm1
-; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastd (%rdi), %xmm1
+; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1110:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1111:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1112:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -7193,13 +31453,67 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastd (%rsi), %xmm1
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1113:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1114:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1115:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -7216,21 +31530,23 @@ entry:
define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsged_v8i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask:
-; NoVLX: ## BB#0: ## %entry
-; NoVLX-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
-; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -7242,21 +31558,23 @@ entry:
}
define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpnltd (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem:
-; NoVLX: ## BB#0: ## %entry
-; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -7269,23 +31587,25 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask:
-; NoVLX: ## BB#0: ## %entry
-; NoVLX-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
-; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -7299,23 +31619,25 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem:
-; NoVLX: ## BB#0: ## %entry
-; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -7331,22 +31653,24 @@ entry:
define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastd (%rdi), %ymm1
-; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastd (%rdi), %ymm1
+; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b:
-; NoVLX: ## BB#0: ## %entry
-; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -7360,24 +31684,26 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastd (%rsi), %ymm1
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastd (%rsi), %ymm1
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b:
-; NoVLX: ## BB#0: ## %entry
-; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -7394,12 +31720,72 @@ entry:
define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsged_v8i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1116:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1117:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1118:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
@@ -7410,12 +31796,72 @@ entry:
}
define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpnltd (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1119:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1120:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1121:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -7427,13 +31873,75 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1122:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1123:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1124:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: kandw %k1, %k0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
@@ -7446,13 +31954,75 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1125:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1126:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1127:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: kandw %k1, %k0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -7467,13 +32037,73 @@ entry:
define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastd (%rdi), %ymm1
-; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastd (%rdi), %ymm1
+; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1128:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1129:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1130:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
@@ -7486,14 +32116,76 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastd (%rsi), %ymm1
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastd (%rsi), %ymm1
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1131:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1132:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1133:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: kandw %k0, %k1, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
@@ -7509,12 +32201,77 @@ entry:
define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsged_v8i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1134:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1135:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1136:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
@@ -7525,12 +32282,77 @@ entry:
}
define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpnltd (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1137:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1138:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1139:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -7542,13 +32364,80 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1140:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1141:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1142:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: kandw %k1, %k0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
@@ -7561,13 +32450,80 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1143:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1144:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1145:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: kandw %k1, %k0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -7582,13 +32538,78 @@ entry:
define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastd (%rdi), %ymm1
-; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastd (%rdi), %ymm1
+; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1146:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1147:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1148:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
@@ -7601,14 +32622,81 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastd (%rsi), %ymm1
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastd (%rsi), %ymm1
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1149:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1150:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1151:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: kandw %k0, %k1, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
@@ -7624,12 +32712,120 @@ entry:
define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsged_v16i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1152:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1153:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1154:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1155:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1156:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1157:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1158:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1159:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
@@ -7640,12 +32836,120 @@ entry:
}
define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1160:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1161:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1162:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1163:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1164:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1165:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1166:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1167:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -7657,13 +32961,122 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1168:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1169:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1170:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1171:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1172:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1173:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1174:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1175:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
@@ -7676,13 +33089,122 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1176:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1177:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1178:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1179:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1180:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1181:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1182:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1183:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -7697,13 +33219,122 @@ entry:
define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastd (%rdi), %zmm1
-; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastd (%rdi), %zmm1
+; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1184:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1185:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1186:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1187:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1188:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1189:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1190:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1191:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpbroadcastd (%rdi), %zmm1
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
@@ -7716,14 +33347,124 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastd (%rsi), %zmm1
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastd (%rsi), %zmm1
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1192:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1193:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1194:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1195:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1196:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1197:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1198:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1199:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpbroadcastd (%rsi), %zmm1
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
@@ -7739,12 +33480,125 @@ entry:
define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsged_v16i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1200:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1201:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1202:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi1203:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1204:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1205:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1206:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1207:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
@@ -7755,12 +33609,125 @@ entry:
}
define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1208:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1209:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1210:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi1211:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1212:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1213:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1214:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1215:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -7772,13 +33739,127 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1216:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1217:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1218:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi1219:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1220:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1221:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1222:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1223:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
@@ -7791,13 +33872,127 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1224:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1225:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1226:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi1227:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1228:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1229:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1230:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1231:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -7812,13 +34007,127 @@ entry:
define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastd (%rdi), %zmm1
-; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastd (%rdi), %zmm1
+; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1232:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1233:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1234:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi1235:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1236:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1237:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1238:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1239:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpbroadcastd (%rdi), %zmm1
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
@@ -7831,14 +34140,129 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastd (%rsi), %zmm1
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastd (%rsi), %zmm1
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1240:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1241:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1242:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi1243:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1244:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1245:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1246:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1247:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpbroadcastd (%rsi), %zmm1
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
@@ -7854,12 +34278,25 @@ entry:
define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0
-; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
+; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -7870,12 +34307,26 @@ entry:
}
define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -7887,13 +34338,34 @@ entry:
}
define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -7907,13 +34379,35 @@ entry:
}
define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -7929,13 +34423,27 @@ entry:
define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastq (%rdi), %xmm1
-; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0
-; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastq (%rdi), %xmm1
+; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
+; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -7948,14 +34456,36 @@ entry:
}
define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastq (%rsi), %xmm1
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -7972,11 +34502,37 @@ entry:
define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -7987,11 +34543,38 @@ entry:
}
define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -8003,12 +34586,46 @@ entry:
}
define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -8022,12 +34639,47 @@ entry:
}
define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -8043,12 +34695,39 @@ entry:
define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastq (%rdi), %xmm1
-; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastq (%rdi), %xmm1
+; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -8061,13 +34740,48 @@ entry:
}
define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastq (%rsi), %xmm1
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -8084,11 +34798,36 @@ entry:
define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -8099,11 +34838,37 @@ entry:
}
define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -8115,12 +34880,45 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -8134,12 +34932,46 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -8155,12 +34987,38 @@ entry:
define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastq (%rdi), %xmm1
-; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastq (%rdi), %xmm1
+; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -8173,13 +35031,47 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastq (%rsi), %xmm1
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -8196,11 +35088,41 @@ entry:
define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1248:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1249:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1250:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -8211,11 +35133,42 @@ entry:
}
define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1251:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1252:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1253:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -8227,12 +35180,50 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1254:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1255:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1256:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -8246,12 +35237,51 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1257:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1258:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1259:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -8267,12 +35297,43 @@ entry:
define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastq (%rdi), %xmm1
-; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastq (%rdi), %xmm1
+; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1260:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1261:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1262:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -8285,13 +35346,52 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastq (%rsi), %xmm1
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1263:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1264:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1265:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -8308,11 +35408,48 @@ entry:
define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1266:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1267:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1268:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -8323,11 +35460,49 @@ entry:
}
define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1269:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1270:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1271:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -8339,12 +35514,57 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1272:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1273:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1274:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -8358,12 +35578,58 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1275:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1276:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1277:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -8379,12 +35645,50 @@ entry:
define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastq (%rdi), %xmm1
-; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastq (%rdi), %xmm1
+; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1278:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1279:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1280:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -8397,13 +35701,59 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastq (%rsi), %xmm1
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1281:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1282:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1283:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -8420,12 +35770,55 @@ entry:
define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -8436,12 +35829,56 @@ entry:
}
define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -8453,13 +35890,74 @@ entry:
}
define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -8473,13 +35971,75 @@ entry:
}
define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -8495,13 +36055,57 @@ entry:
define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastq (%rdi), %ymm1
-; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastq (%rdi), %ymm1
+; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -8514,14 +36118,76 @@ entry:
}
define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastq (%rsi), %ymm1
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastq (%rsi), %ymm1
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -8538,12 +36204,54 @@ entry:
define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -8554,12 +36262,55 @@ entry:
}
define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -8571,13 +36322,73 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -8591,13 +36402,74 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -8613,13 +36485,56 @@ entry:
define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastq (%rdi), %ymm1
-; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastq (%rdi), %ymm1
+; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -8632,14 +36547,75 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastq (%rsi), %ymm1
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastq (%rsi), %ymm1
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -8656,12 +36632,43 @@ entry:
define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1284:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1285:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1286:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -8672,12 +36679,44 @@ entry:
}
define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1287:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1288:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1289:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -8689,13 +36728,62 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1290:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1291:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1292:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -8709,13 +36797,63 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1293:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1294:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1295:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -8731,13 +36869,45 @@ entry:
define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastq (%rdi), %ymm1
-; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastq (%rdi), %ymm1
+; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1296:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1297:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1298:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -8750,14 +36920,64 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastq (%rsi), %ymm1
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastq (%rsi), %ymm1
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1299:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1300:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1301:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -8774,12 +36994,50 @@ entry:
define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1302:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1303:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1304:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -8790,12 +37048,51 @@ entry:
}
define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1305:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1306:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1307:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -8807,13 +37104,69 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1308:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1309:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1310:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -8827,13 +37180,70 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1311:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1312:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1313:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -8849,13 +37259,52 @@ entry:
define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastq (%rdi), %ymm1
-; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastq (%rdi), %ymm1
+; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1314:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1315:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1316:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -8868,14 +37317,71 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastq (%rsi), %ymm1
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastq (%rsi), %ymm1
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1317:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1318:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1319:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -8892,12 +37398,20 @@ entry:
define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
@@ -8908,12 +37422,20 @@ entry:
}
define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -8925,13 +37447,22 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
@@ -8944,13 +37475,22 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -8965,13 +37505,22 @@ entry:
define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastq (%rdi), %zmm1
-; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastq (%rdi), %zmm1
+; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rdi), %zmm1
+; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
@@ -8984,14 +37533,24 @@ entry:
}
define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastq (%rsi), %zmm1
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastq (%rsi), %zmm1
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rsi), %zmm1
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
@@ -9007,12 +37566,70 @@ entry:
define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1320:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1321:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1322:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
@@ -9023,12 +37640,70 @@ entry:
}
define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1323:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1324:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1325:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -9040,13 +37715,72 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1326:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1327:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1328:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
@@ -9059,13 +37793,72 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1329:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1330:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1331:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -9080,13 +37873,72 @@ entry:
define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastq (%rdi), %zmm1
-; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastq (%rdi), %zmm1
+; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1332:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1333:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1334:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rdi), %zmm1
+; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
@@ -9099,14 +37951,74 @@ entry:
}
define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastq (%rsi), %zmm1
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastq (%rsi), %zmm1
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1335:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1336:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1337:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rsi), %zmm1
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
@@ -9122,12 +38034,75 @@ entry:
define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1338:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1339:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1340:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
@@ -9138,12 +38113,75 @@ entry:
}
define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1341:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1342:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1343:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -9155,13 +38193,77 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1344:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1345:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1346:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
@@ -9174,13 +38276,77 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1347:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1348:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1349:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -9195,13 +38361,77 @@ entry:
define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastq (%rdi), %zmm1
-; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastq (%rdi), %zmm1
+; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1350:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1351:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1352:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rdi), %zmm1
+; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
@@ -9214,14 +38444,79 @@ entry:
}
define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastq (%rsi), %zmm1
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpbroadcastq (%rsi), %zmm1
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1353:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1354:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1355:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rsi), %zmm1
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
@@ -9237,11 +38532,125 @@ entry:
define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultb_v16i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltub %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1356:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1357:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1358:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1359:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1360:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1361:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1362:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1363:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
@@ -9252,11 +38661,125 @@ entry:
}
define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltub (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltub (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1364:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1365:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1366:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1367:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1368:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1369:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1370:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1371:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -9268,12 +38791,127 @@ entry:
}
define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1372:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1373:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1374:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1375:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1376:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1377:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1378:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1379:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
@@ -9286,12 +38924,127 @@ entry:
}
define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1380:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1381:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1382:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1383:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1384:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1385:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1386:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1387:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -9306,11 +39059,130 @@ entry:
define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultb_v16i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltub %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1388:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1389:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1390:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi1391:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1392:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1393:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1394:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1395:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
@@ -9321,11 +39193,130 @@ entry:
}
define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltub (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltub (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1396:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1397:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1398:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi1399:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1400:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1401:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1402:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1403:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -9337,12 +39328,132 @@ entry:
}
define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1404:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1405:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1406:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi1407:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1408:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1409:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1410:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1411:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
@@ -9355,12 +39466,132 @@ entry:
}
define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1412:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1413:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1414:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi1415:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1416:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1417:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1418:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1419:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -9375,12 +39606,49 @@ entry:
define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultb_v32i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltub %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltub %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1420:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1421:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1422:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%1 = bitcast <4 x i64> %__b to <32 x i8>
@@ -9391,12 +39659,49 @@ entry:
}
define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltub (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltub (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1423:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1424:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1425:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -9408,13 +39713,59 @@ entry:
}
define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1426:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1427:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1428:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $96, %rsp
+; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
+; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
+; NoVLX-NEXT: vpxord %zmm4, %zmm4, %zmm4
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm5 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; NoVLX-NEXT: vpxor %ymm5, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm5, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vptestmd %zmm4, %zmm4, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%1 = bitcast <4 x i64> %__b to <32 x i8>
@@ -9427,13 +39778,59 @@ entry:
}
define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltub (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltub (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1429:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1430:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1431:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $96, %rsp
+; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
+; NoVLX-NEXT: vpxord %zmm3, %zmm3, %zmm3
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; NoVLX-NEXT: vpxor %ymm4, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor (%rsi), %ymm4, %ymm4
+; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm4, %ymm0
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm4
+; NoVLX-NEXT: vpand %xmm2, %xmm4, %xmm2
+; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -9448,11 +39845,27 @@ entry:
define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultw_v8i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuw %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
@@ -9463,11 +39876,27 @@ entry:
}
define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultw_v8i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuw (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -9479,12 +39908,29 @@ entry:
}
define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
@@ -9497,12 +39943,29 @@ entry:
}
define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -9517,11 +39980,75 @@ entry:
define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultw_v8i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuw %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1432:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1433:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1434:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
@@ -9532,11 +40059,75 @@ entry:
}
define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuw (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1435:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1436:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1437:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -9548,12 +40139,77 @@ entry:
}
define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1438:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1439:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1440:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
@@ -9566,12 +40222,77 @@ entry:
}
define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1441:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1442:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1443:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -9586,11 +40307,80 @@ entry:
define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultw_v8i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuw %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1444:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1445:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1446:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
@@ -9601,11 +40391,80 @@ entry:
}
define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuw (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1447:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1448:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1449:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -9617,12 +40476,82 @@ entry:
}
define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1450:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1451:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1452:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
@@ -9635,12 +40564,82 @@ entry:
}
define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1453:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1454:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1455:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
+; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -9655,12 +40654,126 @@ entry:
define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultw_v16i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuw %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1456:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1457:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1458:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1459:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1460:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1461:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1462:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1463:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
@@ -9671,12 +40784,126 @@ entry:
}
define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuw (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuw (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1464:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1465:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1466:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1467:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1468:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1469:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1470:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1471:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -9688,13 +40915,128 @@ entry:
}
define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1472:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1473:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1474:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1475:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1476:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1477:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1478:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1479:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
@@ -9707,13 +41049,128 @@ entry:
}
define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuw (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuw (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1480:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1481:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1482:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1483:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1484:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1485:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1486:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1487:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -9728,12 +41185,131 @@ entry:
define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultw_v16i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuw %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1488:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1489:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1490:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi1491:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1492:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1493:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1494:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1495:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
@@ -9744,12 +41320,131 @@ entry:
}
define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuw (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuw (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1496:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1497:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1498:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi1499:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1500:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1501:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1502:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1503:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -9761,13 +41456,133 @@ entry:
}
define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1504:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1505:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1506:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi1507:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1508:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1509:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1510:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1511:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
@@ -9780,13 +41595,133 @@ entry:
}
define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuw (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuw (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1512:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1513:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1514:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi1515:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1516:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1517:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1518:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1519:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -9801,12 +41736,353 @@ entry:
define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultw_v32i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuw %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuw %zmm1, %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1520:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1521:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1522:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm3
+; NoVLX-NEXT: vmovq %xmm3, %rax
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: movq %rax, %rdx
+; NoVLX-NEXT: vmovd %eax, %xmm2
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
+; NoVLX-NEXT: vextracti32x4 $1, %zmm1, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm2, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm7, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm6, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm8, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm4
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %ymm2, %ymm3, %ymm3
+; NoVLX-NEXT: vpxor %ymm2, %ymm4, %ymm4
+; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm4, %ymm3
+; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
+; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpmovsxbd %xmm3, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
+; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%1 = bitcast <8 x i64> %__b to <32 x i16>
@@ -9817,12 +42093,268 @@ entry:
}
define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuw (%rdi), %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuw (%rdi), %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1523:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1524:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1525:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm2
+; NoVLX-NEXT: vmovq %xmm2, %rax
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: movq %rax, %rdx
+; NoVLX-NEXT: vmovd %eax, %xmm1
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm2
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %ymm1, %ymm2, %ymm2
+; NoVLX-NEXT: vpxor 32(%rdi), %ymm1, %ymm3
+; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %eax, %xmm2
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -9834,13 +42366,363 @@ entry:
}
define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1526:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1527:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1528:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $96, %rsp
+; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
+; NoVLX-NEXT: vmovq %xmm2, %rax
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: movq %rax, %rdx
+; NoVLX-NEXT: vmovd %eax, %xmm3
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
+; NoVLX-NEXT: vextracti32x4 $1, %zmm1, %xmm8
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
+; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm3, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm6, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm7, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm5, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm8, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm5
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm2
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm8
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm4
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm0
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
+; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm5, %ymm5
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm7
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm1
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm3
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm6 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %ymm6, %ymm4, %ymm3
+; NoVLX-NEXT: vpxor %ymm6, %ymm2, %ymm2
+; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm4
+; NoVLX-NEXT: vpxor %ymm6, %ymm8, %ymm2
+; NoVLX-NEXT: vpxor %ymm6, %ymm5, %ymm3
+; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpand %xmm7, %xmm2, %xmm2
+; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpand %xmm0, %xmm4, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%1 = bitcast <8 x i64> %__b to <32 x i16>
@@ -9853,13 +42735,278 @@ entry:
}
define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuw (%rsi), %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuw (%rsi), %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1529:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1530:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1531:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $96, %rsp
+; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
+; NoVLX-NEXT: vmovq %xmm1, %rax
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: movq %rax, %rdx
+; NoVLX-NEXT: vmovd %eax, %xmm2
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm4
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm5
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm5, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm3
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm5
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm6
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm0
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm7
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm2
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm5, %ymm4
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm7, %xmm3
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm3, %ymm3
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm5 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %ymm5, %ymm3, %ymm3
+; NoVLX-NEXT: vpxor (%rsi), %ymm5, %ymm6
+; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm6, %ymm3
+; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
+; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %eax, %xmm3
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpxor %ymm5, %ymm4, %ymm4
+; NoVLX-NEXT: vpxor 32(%rsi), %ymm5, %ymm5
+; NoVLX-NEXT: vpcmpgtw %ymm4, %ymm5, %ymm4
+; NoVLX-NEXT: vpmovsxwd %ymm4, %zmm4
+; NoVLX-NEXT: vpslld $31, %zmm4, %zmm4
+; NoVLX-NEXT: vptestmd %zmm4, %zmm4, %k0
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpand %xmm2, %xmm4, %xmm2
+; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %ecx
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: shlq $32, %rax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -9874,11 +43021,54 @@ entry:
define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultd_v4i1_v8i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -9889,11 +43079,54 @@ entry:
}
define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltud (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -9905,12 +43138,73 @@ entry:
}
define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -9924,12 +43218,73 @@ entry:
}
define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -9945,11 +43300,55 @@ entry:
define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
+; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -9962,12 +43361,74 @@ entry:
}
define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -9984,11 +43445,53 @@ entry:
define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultd_v4i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -9999,11 +43502,53 @@ entry:
}
define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltud (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -10015,12 +43560,72 @@ entry:
}
define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -10034,12 +43639,72 @@ entry:
}
define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -10055,11 +43720,54 @@ entry:
define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
+; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -10072,12 +43780,73 @@ entry:
}
define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -10094,11 +43863,42 @@ entry:
define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultd_v4i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1532:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1533:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1534:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -10109,11 +43909,42 @@ entry:
}
define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltud (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1535:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1536:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1537:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -10125,12 +43956,61 @@ entry:
}
define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1538:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1539:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1540:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -10144,12 +44024,61 @@ entry:
}
define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1541:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1542:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1543:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -10165,11 +44094,43 @@ entry:
define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1544:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1545:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1546:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
+; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -10182,12 +44143,62 @@ entry:
}
define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1547:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1548:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1549:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -10204,11 +44215,49 @@ entry:
define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultd_v4i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1550:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1551:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1552:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -10219,11 +44268,49 @@ entry:
}
define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltud (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1553:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1554:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1555:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -10235,12 +44322,68 @@ entry:
}
define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1556:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1557:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1558:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
@@ -10254,12 +44397,68 @@ entry:
}
define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1559:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1560:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1561:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -10275,11 +44474,50 @@ entry:
define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1562:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1563:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1564:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
+; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -10292,12 +44530,69 @@ entry:
}
define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1565:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1566:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1567:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
@@ -10314,21 +44609,23 @@ entry:
define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultd_v8i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltud %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask:
-; NoVLX: ## BB#0: ## %entry
-; NoVLX-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
-; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -10340,21 +44637,23 @@ entry:
}
define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltud (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem:
-; NoVLX: ## BB#0: ## %entry
-; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -10367,23 +44666,25 @@ entry:
}
define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask:
-; NoVLX: ## BB#0: ## %entry
-; NoVLX-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
-; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -10397,23 +44698,25 @@ entry:
}
define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem:
-; NoVLX: ## BB#0: ## %entry
-; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -10429,21 +44732,23 @@ entry:
define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b:
-; NoVLX: ## BB#0: ## %entry
-; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -10457,23 +44762,25 @@ entry:
}
define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b:
-; NoVLX: ## BB#0: ## %entry
-; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -10490,12 +44797,72 @@ entry:
define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultd_v8i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltud %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1568:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1569:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1570:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
@@ -10506,12 +44873,72 @@ entry:
}
define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltud (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1571:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1572:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1573:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -10523,13 +44950,75 @@ entry:
}
define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1574:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1575:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1576:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: kandw %k1, %k0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
@@ -10542,13 +45031,75 @@ entry:
}
define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1577:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1578:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1579:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: kandw %k1, %k0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -10563,12 +45114,72 @@ entry:
define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1580:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1581:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1582:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
@@ -10581,13 +45192,75 @@ entry:
}
define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1583:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1584:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1585:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: kandw %k0, %k1, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
@@ -10603,12 +45276,77 @@ entry:
define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultd_v8i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltud %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1586:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1587:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1588:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
@@ -10619,12 +45357,77 @@ entry:
}
define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltud (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1589:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1590:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1591:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -10636,13 +45439,80 @@ entry:
}
define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1592:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1593:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1594:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: kandw %k1, %k0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
@@ -10655,13 +45525,80 @@ entry:
}
define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1595:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1596:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1597:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: kandw %k1, %k0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -10676,12 +45613,77 @@ entry:
define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1598:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1599:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1600:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
@@ -10694,13 +45696,80 @@ entry:
}
define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1601:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1602:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1603:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: kandw %k0, %k1, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
@@ -10716,12 +45785,120 @@ entry:
define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultd_v16i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1604:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1605:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1606:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1607:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1608:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1609:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1610:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1611:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
@@ -10732,12 +45909,120 @@ entry:
}
define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltud (%rdi), %zmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1612:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1613:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1614:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1615:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1616:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1617:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1618:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1619:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -10749,13 +46034,122 @@ entry:
}
define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1620:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1621:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1622:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1623:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1624:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1625:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1626:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1627:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
@@ -10768,13 +46162,122 @@ entry:
}
define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1628:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1629:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1630:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1631:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1632:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1633:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1634:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1635:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -10789,12 +46292,120 @@ entry:
define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1636:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1637:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1638:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1639:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1640:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1641:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1642:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1643:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
@@ -10807,13 +46418,122 @@ entry:
}
define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1644:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1645:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1646:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1647:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1648:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1649:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1650:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1651:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
@@ -10829,12 +46549,125 @@ entry:
define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultd_v16i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1652:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1653:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1654:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi1655:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1656:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1657:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1658:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1659:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
@@ -10845,12 +46678,125 @@ entry:
}
define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltud (%rdi), %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1660:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1661:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1662:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi1663:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1664:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1665:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1666:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1667:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -10862,13 +46808,127 @@ entry:
}
define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1668:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1669:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1670:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi1671:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1672:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1673:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1674:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1675:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
@@ -10881,13 +46941,127 @@ entry:
}
define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1676:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1677:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1678:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi1679:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1680:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1681:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1682:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1683:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -10902,12 +47076,125 @@ entry:
define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1684:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1685:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1686:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi1687:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1688:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1689:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1690:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1691:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
@@ -10920,13 +47207,127 @@ entry:
}
define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1692:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1693:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1694:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi1695:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1696:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1697:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1698:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1699:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
@@ -10942,12 +47343,26 @@ entry:
define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v2i1_v4i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -10958,12 +47373,26 @@ entry:
}
define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -10975,13 +47404,37 @@ entry:
}
define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -10995,13 +47448,37 @@ entry:
}
define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -11017,12 +47494,27 @@ entry:
define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
+; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -11035,13 +47527,38 @@ entry:
}
define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -11058,11 +47575,38 @@ entry:
define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v2i1_v8i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -11073,11 +47617,38 @@ entry:
}
define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -11089,12 +47660,49 @@ entry:
}
define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -11108,12 +47716,49 @@ entry:
}
define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -11129,11 +47774,39 @@ entry:
define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -11146,12 +47819,50 @@ entry:
}
define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -11168,11 +47879,37 @@ entry:
define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v2i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -11183,11 +47920,37 @@ entry:
}
define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -11199,12 +47962,48 @@ entry:
}
define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -11218,12 +48017,48 @@ entry:
}
define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -11239,11 +48074,38 @@ entry:
define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -11256,12 +48118,49 @@ entry:
}
define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -11278,11 +48177,42 @@ entry:
define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v2i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1700:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1701:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1702:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -11293,11 +48223,42 @@ entry:
}
define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1703:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1704:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1705:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -11309,12 +48270,53 @@ entry:
}
define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1706:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1707:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1708:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -11328,12 +48330,53 @@ entry:
}
define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1709:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1710:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1711:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -11349,11 +48392,43 @@ entry:
define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1712:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1713:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1714:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -11366,12 +48441,54 @@ entry:
}
define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1715:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1716:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1717:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -11388,11 +48505,49 @@ entry:
define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v2i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1718:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1719:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1720:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -11403,11 +48558,49 @@ entry:
}
define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1721:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1722:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1723:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -11419,12 +48612,60 @@ entry:
}
define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1724:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1725:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1726:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
@@ -11438,12 +48679,60 @@ entry:
}
define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1727:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1728:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1729:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -11459,11 +48748,50 @@ entry:
define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1730:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1731:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1732:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -11476,12 +48804,61 @@ entry:
}
define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1733:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1734:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1735:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
@@ -11498,12 +48875,56 @@ entry:
define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v4i1_v8i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -11514,12 +48935,56 @@ entry:
}
define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -11531,13 +48996,75 @@ entry:
}
define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -11551,13 +49078,75 @@ entry:
}
define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -11573,12 +49162,57 @@ entry:
define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
+; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -11591,13 +49225,76 @@ entry:
}
define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
+; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -11614,12 +49311,55 @@ entry:
define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v4i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -11630,12 +49370,55 @@ entry:
}
define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -11647,13 +49430,74 @@ entry:
}
define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -11667,13 +49511,74 @@ entry:
}
define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -11689,12 +49594,56 @@ entry:
define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
+; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -11707,13 +49656,75 @@ entry:
}
define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
+; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -11730,12 +49741,44 @@ entry:
define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v4i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1736:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1737:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1738:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -11746,12 +49789,44 @@ entry:
}
define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1739:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1740:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1741:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -11763,13 +49838,63 @@ entry:
}
define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1742:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1743:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1744:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -11783,13 +49908,63 @@ entry:
}
define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1745:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1746:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1747:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -11805,12 +49980,45 @@ entry:
define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1748:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1749:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1750:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
+; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -11823,13 +50031,64 @@ entry:
}
define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1751:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1752:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1753:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
+; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -11846,12 +50105,51 @@ entry:
define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v4i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1754:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1755:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1756:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -11862,12 +50160,51 @@ entry:
}
define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1757:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1758:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1759:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -11879,13 +50216,70 @@ entry:
}
define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1760:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1761:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1762:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
@@ -11899,13 +50293,70 @@ entry:
}
define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1763:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1764:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1765:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -11921,12 +50372,52 @@ entry:
define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1766:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1767:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1768:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
+; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -11939,13 +50430,71 @@ entry:
}
define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1769:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1770:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1771:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
+; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k0
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftlw $13, %k0, %k2
+; NoVLX-NEXT: kshiftrw $15, %k2, %k2
+; NoVLX-NEXT: kshiftlw $15, %k0, %k3
+; NoVLX-NEXT: kshiftrw $15, %k3, %k3
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k2, %eax
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
@@ -11962,12 +50511,20 @@ entry:
define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v8i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
@@ -11978,12 +50535,20 @@ entry:
}
define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -11995,13 +50560,22 @@ entry:
}
define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
@@ -12014,13 +50588,22 @@ entry:
}
define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -12035,12 +50618,20 @@ entry:
define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
@@ -12053,13 +50644,22 @@ entry:
}
define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
@@ -12075,12 +50675,70 @@ entry:
define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v8i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1772:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1773:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1774:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
@@ -12091,12 +50749,70 @@ entry:
}
define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1775:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1776:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1777:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -12108,13 +50824,72 @@ entry:
}
define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1778:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1779:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1780:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
@@ -12127,13 +50902,72 @@ entry:
}
define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1781:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1782:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1783:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -12148,12 +50982,70 @@ entry:
define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1784:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1785:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1786:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
@@ -12166,13 +51058,72 @@ entry:
}
define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1787:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1788:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1789:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
@@ -12188,12 +51139,75 @@ entry:
define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v8i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1790:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1791:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1792:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
@@ -12204,12 +51218,75 @@ entry:
}
define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1793:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1794:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1795:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -12221,13 +51298,77 @@ entry:
}
define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1796:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1797:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1798:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
@@ -12240,13 +51381,77 @@ entry:
}
define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1799:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1800:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1801:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -12261,12 +51466,75 @@ entry:
define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1802:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1803:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1804:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
@@ -12279,13 +51547,77 @@ entry:
}
define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1805:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1806:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1807:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
@@ -12302,11 +51634,51 @@ entry:
declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqps_v4i1_v8i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%1 = bitcast <2 x i64> %__b to <4 x float>
@@ -12317,11 +51689,51 @@ entry:
}
define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqps (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -12333,11 +51745,52 @@ entry:
}
define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1
+; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load float, float* %__b
@@ -12351,11 +51804,50 @@ entry:
define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqps_v4i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%1 = bitcast <2 x i64> %__b to <4 x float>
@@ -12366,11 +51858,50 @@ entry:
}
define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqps (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -12382,11 +51913,51 @@ entry:
}
define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1
+; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load float, float* %__b
@@ -12400,11 +51971,39 @@ entry:
define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqps_v4i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1808:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1809:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1810:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%1 = bitcast <2 x i64> %__b to <4 x float>
@@ -12415,11 +52014,39 @@ entry:
}
define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqps (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1811:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1812:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1813:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -12431,11 +52058,40 @@ entry:
}
define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1814:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1815:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1816:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1
+; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load float, float* %__b
@@ -12449,11 +52105,46 @@ entry:
define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqps_v4i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1817:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1818:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1819:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%1 = bitcast <2 x i64> %__b to <4 x float>
@@ -12464,11 +52155,46 @@ entry:
}
define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqps (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1820:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1821:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1822:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -12480,11 +52206,47 @@ entry:
}
define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1823:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1824:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1825:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1
+; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load float, float* %__b
@@ -12498,21 +52260,23 @@ entry:
define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqps_v8i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask:
-; NoVLX: ## BB#0: ## %entry
-; NoVLX-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
-; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
@@ -12524,21 +52288,23 @@ entry:
}
define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqps (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem:
-; NoVLX: ## BB#0: ## %entry
-; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovaps (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
@@ -12551,21 +52317,23 @@ entry:
}
define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b:
-; NoVLX: ## BB#0: ## %entry
-; NoVLX-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
@@ -12580,12 +52348,72 @@ entry:
define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqps_v8i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1826:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1827:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1828:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%1 = bitcast <4 x i64> %__b to <8 x float>
@@ -12596,12 +52424,72 @@ entry:
}
define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqps (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1829:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1830:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1831:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vmovaps (%rdi), %ymm1
+; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -12613,12 +52501,72 @@ entry:
}
define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1832:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1833:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1834:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1
+; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%load = load float, float* %__b
@@ -12632,12 +52580,77 @@ entry:
define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqps_v8i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1835:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1836:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1837:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%1 = bitcast <4 x i64> %__b to <8 x float>
@@ -12648,12 +52661,77 @@ entry:
}
define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqps (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1838:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1839:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1840:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vmovaps (%rdi), %ymm1
+; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -12665,12 +52743,77 @@ entry:
}
define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1841:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1842:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1843:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1
+; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%load = load float, float* %__b
@@ -12684,12 +52827,120 @@ entry:
define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqps_v16i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1844:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1845:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1846:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1847:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1848:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1849:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1850:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1851:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%1 = bitcast <8 x i64> %__b to <16 x float>
@@ -12700,12 +52951,120 @@ entry:
}
define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqps (%rdi), %zmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1852:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1853:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1854:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1855:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1856:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1857:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1858:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1859:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -12717,12 +53076,120 @@ entry:
}
define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, float* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1860:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1861:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1862:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1863:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1864:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1865:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1866:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1867:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%load = load float, float* %__b
@@ -12736,12 +53203,18 @@ entry:
define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqps_v16i1_v32i1_sae_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_sae_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
+; VLX-NEXT: kmovw %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_sae_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%1 = bitcast <8 x i64> %__b to <16 x float>
@@ -12752,12 +53225,125 @@ entry:
define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqps_v16i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1868:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1869:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1870:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi1871:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1872:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1873:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1874:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1875:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%1 = bitcast <8 x i64> %__b to <16 x float>
@@ -12768,12 +53354,125 @@ entry:
}
define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqps (%rdi), %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1876:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1877:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1878:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi1879:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1880:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1881:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1882:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1883:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -12785,12 +53484,125 @@ entry:
}
define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, float* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1884:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1885:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1886:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: .Lcfi1887:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1888:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1889:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1890:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1891:
+; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r11d
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r14d
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r15d
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r12d
+; NoVLX-NEXT: kshiftlw $8, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r13d
+; NoVLX-NEXT: kshiftlw $7, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $6, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ebx
+; NoVLX-NEXT: kshiftlw $5, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $4, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $2, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
+; NoVLX-NEXT: kshiftlw $1, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%load = load float, float* %__b
@@ -12804,13 +53616,20 @@ entry:
define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqps_v16i1_v64i1_sae_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: movzwl %ax, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_sae_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: movzwl %ax, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_sae_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%1 = bitcast <8 x i64> %__b to <16 x float>
@@ -12822,12 +53641,23 @@ entry:
declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%1 = bitcast <2 x i64> %__b to <2 x double>
@@ -12838,12 +53668,23 @@ entry:
}
define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -12855,12 +53696,24 @@ entry:
}
define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
+; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
+; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
+; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load double, double* %__b
@@ -12874,11 +53727,35 @@ entry:
define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%1 = bitcast <2 x i64> %__b to <2 x double>
@@ -12889,11 +53766,35 @@ entry:
}
define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -12905,11 +53806,36 @@ entry:
}
define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
+; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load double, double* %__b
@@ -12923,11 +53849,34 @@ entry:
define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%1 = bitcast <2 x i64> %__b to <2 x double>
@@ -12938,11 +53887,34 @@ entry:
}
define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -12954,11 +53926,35 @@ entry:
}
define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
+; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load double, double* %__b
@@ -12972,11 +53968,39 @@ entry:
define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1892:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1893:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1894:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%1 = bitcast <2 x i64> %__b to <2 x double>
@@ -12987,11 +54011,39 @@ entry:
}
define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1895:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1896:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1897:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -13003,11 +54055,40 @@ entry:
}
define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1898:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1899:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1900:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
+; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load double, double* %__b
@@ -13021,11 +54102,46 @@ entry:
define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1901:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1902:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1903:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%1 = bitcast <2 x i64> %__b to <2 x double>
@@ -13036,11 +54152,46 @@ entry:
}
define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1904:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1905:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1906:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load <2 x i64>, <2 x i64>* %__b
@@ -13052,11 +54203,47 @@ entry:
}
define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1907:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1908:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1909:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
+; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load double, double* %__b
@@ -13070,12 +54257,53 @@ entry:
define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%1 = bitcast <4 x i64> %__b to <4 x double>
@@ -13086,12 +54314,53 @@ entry:
}
define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -13103,12 +54372,54 @@ entry:
}
define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1
+; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kshiftlw $7, %k0, %k0
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
+; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load double, double* %__b
@@ -13122,12 +54433,52 @@ entry:
define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%1 = bitcast <4 x i64> %__b to <4 x double>
@@ -13138,12 +54489,52 @@ entry:
}
define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -13155,12 +54546,53 @@ entry:
}
define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1
+; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
+; NoVLX-NEXT: andl $1, %eax
+; NoVLX-NEXT: kmovw %eax, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k1
+; NoVLX-NEXT: kshiftlw $1, %k1, %k1
+; NoVLX-NEXT: korw %k0, %k1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
+; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load double, double* %__b
@@ -13174,12 +54606,41 @@ entry:
define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1910:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1911:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1912:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%1 = bitcast <4 x i64> %__b to <4 x double>
@@ -13190,12 +54651,41 @@ entry:
}
define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1913:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1914:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1915:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -13207,12 +54697,42 @@ entry:
}
define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1916:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1917:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1918:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1
+; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load double, double* %__b
@@ -13226,12 +54746,48 @@ entry:
define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1919:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1920:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1921:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%1 = bitcast <4 x i64> %__b to <4 x double>
@@ -13242,12 +54798,48 @@ entry:
}
define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1922:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1923:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1924:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load <4 x i64>, <4 x i64>* %__b
@@ -13259,12 +54851,49 @@ entry:
}
define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1925:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1926:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1927:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1
+; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
+; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load double, double* %__b
@@ -13278,12 +54907,20 @@ entry:
define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
@@ -13294,12 +54931,20 @@ entry:
}
define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -13311,12 +54956,20 @@ entry:
}
define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%load = load double, double* %__b
@@ -13330,12 +54983,22 @@ entry:
define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v8i1_v16i1_sae_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_sae_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: movzbl %al, %eax
+; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_sae_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzbl %al, %eax
+; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
@@ -13346,12 +55009,70 @@ entry:
define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1928:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1929:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1930:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
@@ -13362,12 +55083,70 @@ entry:
}
define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1931:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1932:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1933:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -13379,12 +55158,70 @@ entry:
}
define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1934:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1935:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1936:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%load = load double, double* %__b
@@ -13398,12 +55235,19 @@ entry:
define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v8i1_v32i1_sae_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovb %k0, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_sae_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
+; VLX-NEXT: kmovb %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_sae_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzbl %al, %eax
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
@@ -13414,12 +55258,75 @@ entry:
define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1937:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1938:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1939:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
@@ -13430,12 +55337,75 @@ entry:
}
define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1940:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1941:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1942:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%load = load <8 x i64>, <8 x i64>* %__b
@@ -13447,12 +55417,75 @@ entry:
}
define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .Lcfi1943:
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .Lcfi1944:
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .Lcfi1945:
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
+; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kshiftlw $14, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r9d
+; NoVLX-NEXT: kshiftlw $13, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edx
+; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %esi
+; NoVLX-NEXT: kshiftlw $11, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %edi
+; NoVLX-NEXT: kshiftlw $10, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kshiftlw $9, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%load = load double, double* %__b
@@ -13466,13 +55499,20 @@ entry:
define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; CHECK-LABEL: test_vcmpoeqpd_v8i1_v64i1_sae_mask:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: movzbl %al, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_sae_mask:
+; VLX: # BB#0: # %entry
+; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: movzbl %al, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_sae_mask:
+; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzbl %al, %eax
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>