diff options
Diffstat (limited to 'test/CodeGen/X86/masked_gather_scatter.ll')
| -rw-r--r-- | test/CodeGen/X86/masked_gather_scatter.ll | 2256 |
1 files changed, 0 insertions, 2256 deletions
diff --git a/test/CodeGen/X86/masked_gather_scatter.ll b/test/CodeGen/X86/masked_gather_scatter.ll deleted file mode 100644 index 77254ba6760f..000000000000 --- a/test/CodeGen/X86/masked_gather_scatter.ll +++ /dev/null @@ -1,2256 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s --check-prefix=ALL --check-prefix=KNL_64 -; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s --check-prefix=ALL --check-prefix=KNL_32 -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX -; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX_32 -; RUN: opt -mtriple=x86_64-apple-darwin -scalarize-masked-mem-intrin -mcpu=corei7-avx -S < %s | FileCheck %s -check-prefix=SCALAR -; RUN: llc -O0 -mtriple=x86_64-unknown-linux-gnu -mcpu=skx < %s -o /dev/null - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - - -; SCALAR-LABEL: test1 -; SCALAR: extractelement <16 x float*> -; SCALAR-NEXT: load float -; SCALAR-NEXT: insertelement <16 x float> -; SCALAR-NEXT: extractelement <16 x float*> -; SCALAR-NEXT: load float - -define <16 x float> @test1(float* %base, <16 x i32> %ind) { -; KNL_64-LABEL: test1: -; KNL_64: # BB#0: -; KNL_64-NEXT: kxnorw %k0, %k0, %k1 -; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} -; KNL_64-NEXT: vmovaps %zmm1, %zmm0 -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test1: -; KNL_32: # BB#0: -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: kxnorw %k0, %k0, %k1 -; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1} -; KNL_32-NEXT: vmovaps %zmm1, %zmm0 -; KNL_32-NEXT: retl -; -; SKX-LABEL: test1: -; SKX: # BB#0: -; SKX-NEXT: kxnorw %k0, %k0, %k1 -; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 -; SKX-NEXT: retq -; -; SKX_32-LABEL: test1: -; SKX_32: # BB#0: -; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SKX_32-NEXT: kxnorw %k0, %k0, %k1 -; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1} -; SKX_32-NEXT: vmovaps %zmm1, %zmm0 -; SKX_32-NEXT: retl - - %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0 - %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer - - %sext_ind = sext <16 x i32> %ind to <16 x i64> - %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind - - %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) - ret <16 x float>%res -} - -declare <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*>, i32, <16 x i1>, <16 x i32>) -declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*>, i32, <16 x i1>, <16 x float>) -declare <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> , i32, <8 x i1> , <8 x i32> ) - - -; SCALAR-LABEL: test2 -; SCALAR: extractelement <16 x float*> -; SCALAR-NEXT: load float -; SCALAR-NEXT: insertelement <16 x float> -; SCALAR-NEXT: br label %else -; SCALAR: else: -; SCALAR-NEXT: %res.phi.else = phi -; SCALAR-NEXT: %Mask1 = extractelement <16 x i1> %imask, i32 1 -; SCALAR-NEXT: %ToLoad1 = icmp eq i1 %Mask1, true -; SCALAR-NEXT: br i1 %ToLoad1, label %cond.load1, label %else2 - -define <16 x float> @test2(float* %base, <16 x i32> %ind, i16 %mask) { -; KNL_64-LABEL: test2: -; KNL_64: # BB#0: -; KNL_64-NEXT: kmovw %esi, %k1 -; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} -; KNL_64-NEXT: vmovaps %zmm1, %zmm0 -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test2: -; KNL_32: # BB#0: -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 -; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1} -; KNL_32-NEXT: vmovaps %zmm1, %zmm0 -; KNL_32-NEXT: retl -; -; SKX-LABEL: test2: -; SKX: # BB#0: -; SKX-NEXT: kmovw %esi, %k1 -; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 -; SKX-NEXT: retq -; -; SKX_32-LABEL: test2: -; SKX_32: # BB#0: -; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SKX_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 -; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1} -; SKX_32-NEXT: vmovaps %zmm1, %zmm0 -; SKX_32-NEXT: retl - - %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0 - %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer - - %sext_ind = sext <16 x i32> %ind to <16 x i64> - %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind - %imask = bitcast i16 %mask to <16 x i1> - %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> %imask, <16 x float>undef) - ret <16 x float> %res -} - -define <16 x i32> @test3(i32* %base, <16 x i32> %ind, i16 %mask) { -; KNL_64-LABEL: test3: -; KNL_64: # BB#0: -; KNL_64-NEXT: kmovw %esi, %k1 -; KNL_64-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k1} -; KNL_64-NEXT: vmovdqa64 %zmm1, %zmm0 -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test3: -; KNL_32: # BB#0: -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 -; KNL_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm1 {%k1} -; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm0 -; KNL_32-NEXT: retl -; -; SKX-LABEL: test3: -; SKX: # BB#0: -; SKX-NEXT: kmovw %esi, %k1 -; SKX-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k1} -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 -; SKX-NEXT: retq -; -; SKX_32-LABEL: test3: -; SKX_32: # BB#0: -; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SKX_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 -; SKX_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm1 {%k1} -; SKX_32-NEXT: vmovdqa64 %zmm1, %zmm0 -; SKX_32-NEXT: retl - - %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0 - %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer - - %sext_ind = sext <16 x i32> %ind to <16 x i64> - %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i64> %sext_ind - %imask = bitcast i16 %mask to <16 x i1> - %res = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef) - ret <16 x i32> %res -} - - -define <16 x i32> @test4(i32* %base, <16 x i32> %ind, i16 %mask) { -; KNL_64-LABEL: test4: -; KNL_64: # BB#0: -; KNL_64-NEXT: kmovw %esi, %k1 -; KNL_64-NEXT: kmovw %k1, %k2 -; KNL_64-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k2} -; KNL_64-NEXT: vmovdqa64 %zmm1, %zmm2 -; KNL_64-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm2 {%k1} -; KNL_64-NEXT: vpaddd %zmm2, %zmm1, %zmm0 -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test4: -; KNL_32: # BB#0: -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 -; KNL_32-NEXT: kmovw %k1, %k2 -; KNL_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm1 {%k2} -; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm2 -; KNL_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm2 {%k1} -; KNL_32-NEXT: vpaddd %zmm2, %zmm1, %zmm0 -; KNL_32-NEXT: retl -; -; SKX-LABEL: test4: -; SKX: # BB#0: -; SKX-NEXT: kmovw %esi, %k1 -; SKX-NEXT: kmovw %k1, %k2 -; SKX-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k2} -; SKX-NEXT: vmovdqa64 %zmm1, %zmm2 -; SKX-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm2 {%k1} -; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm0 -; SKX-NEXT: retq -; -; SKX_32-LABEL: test4: -; SKX_32: # BB#0: -; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SKX_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 -; SKX_32-NEXT: kmovw %k1, %k2 -; SKX_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm1 {%k2} -; SKX_32-NEXT: vmovdqa64 %zmm1, %zmm2 -; SKX_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm2 {%k1} -; SKX_32-NEXT: vpaddd %zmm2, %zmm1, %zmm0 -; SKX_32-NEXT: retl - - %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0 - %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer - - %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind - %imask = bitcast i16 %mask to <16 x i1> - %gt1 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef) - %gt2 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1) - %res = add <16 x i32> %gt1, %gt2 - ret <16 x i32> %res -} - - -; SCALAR-LABEL: test5 -; SCALAR: %Mask0 = extractelement <16 x i1> %imask, i32 0 -; SCALAR-NEXT: %ToStore0 = icmp eq i1 %Mask0, true -; SCALAR-NEXT: br i1 %ToStore0, label %cond.store, label %else -; SCALAR: cond.store: -; SCALAR-NEXT: %Elt0 = extractelement <16 x i32> %val, i32 0 -; SCALAR-NEXT: %Ptr0 = extractelement <16 x i32*> %gep.random, i32 0 -; SCALAR-NEXT: store i32 %Elt0, i32* %Ptr0, align 4 -; SCALAR-NEXT: br label %else -; SCALAR: else: -; SCALAR-NEXT: %Mask1 = extractelement <16 x i1> %imask, i32 1 -; SCALAR-NEXT: %ToStore1 = icmp eq i1 %Mask1, true -; SCALAR-NEXT: br i1 %ToStore1, label %cond.store1, label %else2 - -define void @test5(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) { -; KNL_64-LABEL: test5: -; KNL_64: # BB#0: -; KNL_64-NEXT: kmovw %esi, %k1 -; KNL_64-NEXT: kmovw %k1, %k2 -; KNL_64-NEXT: vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k2} -; KNL_64-NEXT: vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k1} -; KNL_64-NEXT: vzeroupper -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test5: -; KNL_32: # BB#0: -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 -; KNL_32-NEXT: kmovw %k1, %k2 -; KNL_32-NEXT: vpscatterdd %zmm1, (%eax,%zmm0,4) {%k2} -; KNL_32-NEXT: vpscatterdd %zmm1, (%eax,%zmm0,4) {%k1} -; KNL_32-NEXT: vzeroupper -; KNL_32-NEXT: retl -; -; SKX-LABEL: test5: -; SKX: # BB#0: -; SKX-NEXT: kmovw %esi, %k1 -; SKX-NEXT: kmovw %k1, %k2 -; SKX-NEXT: vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k2} -; SKX-NEXT: vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k1} -; SKX-NEXT: vzeroupper -; SKX-NEXT: retq -; -; SKX_32-LABEL: test5: -; SKX_32: # BB#0: -; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SKX_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 -; SKX_32-NEXT: kmovw %k1, %k2 -; SKX_32-NEXT: vpscatterdd %zmm1, (%eax,%zmm0,4) {%k2} -; SKX_32-NEXT: vpscatterdd %zmm1, (%eax,%zmm0,4) {%k1} -; SKX_32-NEXT: vzeroupper -; SKX_32-NEXT: retl - - %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0 - %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer - - %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind - %imask = bitcast i16 %mask to <16 x i1> - call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) - call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) - ret void -} - -declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> , <8 x i32*> , i32 , <8 x i1> ) -declare void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> , <16 x i32*> , i32 , <16 x i1> ) - - -; SCALAR-LABEL: test6 -; SCALAR: store i32 %Elt0, i32* %Ptr01, align 4 -; SCALAR-NEXT: %Elt1 = extractelement <8 x i32> %a1, i32 1 -; SCALAR-NEXT: %Ptr12 = extractelement <8 x i32*> %ptr, i32 1 -; SCALAR-NEXT: store i32 %Elt1, i32* %Ptr12, align 4 -; SCALAR-NEXT: %Elt2 = extractelement <8 x i32> %a1, i32 2 -; SCALAR-NEXT: %Ptr23 = extractelement <8 x i32*> %ptr, i32 2 -; SCALAR-NEXT: store i32 %Elt2, i32* %Ptr23, align 4 - -define <8 x i32> @test6(<8 x i32>%a1, <8 x i32*> %ptr) { -; KNL_64-LABEL: test6: -; KNL_64: # BB#0: -; KNL_64-NEXT: kxnorw %k0, %k0, %k1 -; KNL_64-NEXT: kxnorw %k0, %k0, %k2 -; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2} -; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} -; KNL_64-NEXT: vmovdqa %ymm2, %ymm0 -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test6: -; KNL_32: # BB#0: -; KNL_32-NEXT: vpmovsxdq %ymm1, %zmm2 -; KNL_32-NEXT: kxnorw %k0, %k0, %k1 -; KNL_32-NEXT: kxnorw %k0, %k0, %k2 -; KNL_32-NEXT: vpgatherqd (,%zmm2), %ymm1 {%k2} -; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm2) {%k1} -; KNL_32-NEXT: vmovdqa %ymm1, %ymm0 -; KNL_32-NEXT: retl -; -; SKX-LABEL: test6: -; SKX: # BB#0: -; SKX-NEXT: kxnorw %k0, %k0, %k1 -; SKX-NEXT: kxnorw %k0, %k0, %k2 -; SKX-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2} -; SKX-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} -; SKX-NEXT: vmovdqa %ymm2, %ymm0 -; SKX-NEXT: retq -; -; SKX_32-LABEL: test6: -; SKX_32: # BB#0: -; SKX_32-NEXT: kxnorw %k0, %k0, %k1 -; SKX_32-NEXT: kxnorw %k0, %k0, %k2 -; SKX_32-NEXT: vpgatherdd (,%ymm1), %ymm2 {%k2} -; SKX_32-NEXT: vpscatterdd %ymm0, (,%ymm1) {%k1} -; SKX_32-NEXT: vmovdqa %ymm2, %ymm0 -; SKX_32-NEXT: retl - - %a = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %ptr, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef) - - call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) - ret <8 x i32>%a -} - -define <8 x i32> @test7(i32* %base, <8 x i32> %ind, i8 %mask) { -; -; KNL_64-LABEL: test7: -; KNL_64: # BB#0: -; KNL_64-NEXT: kmovw %esi, %k1 -; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0 -; KNL_64-NEXT: kmovw %k1, %k2 -; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm1 {%k2} -; KNL_64-NEXT: vmovdqa %ymm1, %ymm2 -; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm2 {%k1} -; KNL_64-NEXT: vpaddd %ymm2, %ymm1, %ymm0 -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test7: -; KNL_32: # BB#0: -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; KNL_32-NEXT: kmovw %ecx, %k1 -; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0 -; KNL_32-NEXT: kmovw %k1, %k2 -; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm1 {%k2} -; KNL_32-NEXT: vmovdqa %ymm1, %ymm2 -; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm2 {%k1} -; KNL_32-NEXT: vpaddd %ymm2, %ymm1, %ymm0 -; KNL_32-NEXT: retl -; -; SKX-LABEL: test7: -; SKX: # BB#0: -; SKX-NEXT: kmovw %esi, %k1 -; SKX-NEXT: kmovw %k1, %k2 -; SKX-NEXT: vpgatherdd (%rdi,%ymm0,4), %ymm1 {%k2} -; SKX-NEXT: vmovdqa %ymm1, %ymm2 -; SKX-NEXT: vpgatherdd (%rdi,%ymm0,4), %ymm2 {%k1} -; SKX-NEXT: vpaddd %ymm2, %ymm1, %ymm0 -; SKX-NEXT: retq -; -; SKX_32-LABEL: test7: -; SKX_32: # BB#0: -; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SKX_32-NEXT: kmovb {{[0-9]+}}(%esp), %k1 -; SKX_32-NEXT: kmovw %k1, %k2 -; SKX_32-NEXT: vpgatherdd (%eax,%ymm0,4), %ymm1 {%k2} -; SKX_32-NEXT: vmovdqa %ymm1, %ymm2 -; SKX_32-NEXT: vpgatherdd (%eax,%ymm0,4), %ymm2 {%k1} -; SKX_32-NEXT: vpaddd %ymm2, %ymm1, %ymm0 -; SKX_32-NEXT: retl - - %broadcast.splatinsert = insertelement <8 x i32*> undef, i32* %base, i32 0 - %broadcast.splat = shufflevector <8 x i32*> %broadcast.splatinsert, <8 x i32*> undef, <8 x i32> zeroinitializer - - %gep.random = getelementptr i32, <8 x i32*> %broadcast.splat, <8 x i32> %ind - %imask = bitcast i8 %mask to <8 x i1> - %gt1 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %gep.random, i32 4, <8 x i1> %imask, <8 x i32>undef) - %gt2 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %gep.random, i32 4, <8 x i1> %imask, <8 x i32>%gt1) - %res = add <8 x i32> %gt1, %gt2 - ret <8 x i32> %res -} - -; No uniform base in this case, index <8 x i64> contains addresses, -; each gather call will be split into two -define <16 x i32> @test8(<16 x i32*> %ptr.random, <16 x i32> %ind, i16 %mask) { -; KNL_64-LABEL: test8: -; KNL_64: # BB#0: -; KNL_64-NEXT: kmovw %edi, %k1 -; KNL_64-NEXT: kshiftrw $8, %k1, %k2 -; KNL_64-NEXT: kmovw %k2, %k3 -; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k3} -; KNL_64-NEXT: kmovw %k1, %k3 -; KNL_64-NEXT: vpgatherqd (,%zmm0), %ymm3 {%k3} -; KNL_64-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm4 -; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2} -; KNL_64-NEXT: vpgatherqd (,%zmm0), %ymm3 {%k1} -; KNL_64-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm0 -; KNL_64-NEXT: vpaddd %zmm0, %zmm4, %zmm0 -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test8: -; KNL_32: # BB#0: -; KNL_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 -; KNL_32-NEXT: kmovw %k1, %k2 -; KNL_32-NEXT: vpgatherdd (,%zmm0), %zmm1 {%k2} -; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm2 -; KNL_32-NEXT: vpgatherdd (,%zmm0), %zmm2 {%k1} -; KNL_32-NEXT: vpaddd %zmm2, %zmm1, %zmm0 -; KNL_32-NEXT: retl -; -; SKX-LABEL: test8: -; SKX: # BB#0: -; SKX-NEXT: kmovw %edi, %k1 -; SKX-NEXT: kshiftrw $8, %k1, %k2 -; SKX-NEXT: kmovw %k2, %k3 -; SKX-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k3} -; SKX-NEXT: kmovw %k1, %k3 -; SKX-NEXT: vpgatherqd (,%zmm0), %ymm3 {%k3} -; SKX-NEXT: vinserti32x8 $1, %ymm2, %zmm3, %zmm4 -; SKX-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2} -; SKX-NEXT: vpgatherqd (,%zmm0), %ymm3 {%k1} -; SKX-NEXT: vinserti32x8 $1, %ymm2, %zmm3, %zmm0 -; SKX-NEXT: vpaddd %zmm0, %zmm4, %zmm0 -; SKX-NEXT: retq -; -; SKX_32-LABEL: test8: -; SKX_32: # BB#0: -; SKX_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 -; SKX_32-NEXT: kmovw %k1, %k2 -; SKX_32-NEXT: vpgatherdd (,%zmm0), %zmm1 {%k2} -; SKX_32-NEXT: vmovdqa64 %zmm1, %zmm2 -; SKX_32-NEXT: vpgatherdd (,%zmm0), %zmm2 {%k1} -; SKX_32-NEXT: vpaddd %zmm2, %zmm1, %zmm0 -; SKX_32-NEXT: retl - - %imask = bitcast i16 %mask to <16 x i1> - %gt1 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>undef) - %gt2 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1) - %res = add <16 x i32> %gt1, %gt2 - ret <16 x i32> %res -} - -%struct.RT = type { i8, [10 x [20 x i32]], i8 } -%struct.ST = type { i32, double, %struct.RT } - -; Masked gather for agregate types -; Test9 and Test10 should give the same result (scalar and vector indices in GEP) - - -define <8 x i32> @test9(%struct.ST* %base, <8 x i64> %ind1, <8 x i32>%ind5) { -; KNL_64-LABEL: test9: -; KNL_64: # BB#0: # %entry -; KNL_64-NEXT: vpbroadcastq %rdi, %zmm2 -; KNL_64-NEXT: vpbroadcastq {{.*}}(%rip), %zmm3 -; KNL_64-NEXT: vpmuludq %zmm3, %zmm0, %zmm4 -; KNL_64-NEXT: vpsrlq $32, %zmm0, %zmm0 -; KNL_64-NEXT: vpmuludq %zmm3, %zmm0, %zmm0 -; KNL_64-NEXT: vpsllq $32, %zmm0, %zmm0 -; KNL_64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 -; KNL_64-NEXT: vpmovsxdq %ymm1, %zmm1 -; KNL_64-NEXT: vpmuldq {{.*}}(%rip){1to8}, %zmm1, %zmm1 -; KNL_64-NEXT: vpaddq %zmm1, %zmm0, %zmm0 -; KNL_64-NEXT: vpaddq %zmm0, %zmm4, %zmm0 -; KNL_64-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1 -; KNL_64-NEXT: kxnorw %k0, %k0, %k1 -; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test9: -; KNL_32: # BB#0: # %entry -; KNL_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %ymm2 -; KNL_32-NEXT: vpbroadcastd {{\.LCPI.*}}, %ymm3 -; KNL_32-NEXT: vpmulld %ymm3, %ymm1, %ymm1 -; KNL_32-NEXT: vpmovqd %zmm0, %ymm0 -; KNL_32-NEXT: vpbroadcastd {{\.LCPI.*}}, %ymm3 -; KNL_32-NEXT: vpmulld %ymm3, %ymm0, %ymm0 -; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; KNL_32-NEXT: vpbroadcastd {{\.LCPI.*}}, %ymm1 -; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; KNL_32-NEXT: vpaddd %ymm0, %ymm2, %ymm0 -; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1 -; KNL_32-NEXT: kxnorw %k0, %k0, %k1 -; KNL_32-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} -; KNL_32-NEXT: retl -; -; SKX-LABEL: test9: -; SKX: # BB#0: # %entry -; SKX-NEXT: vpbroadcastq %rdi, %zmm2 -; SKX-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; SKX-NEXT: vpmovsxdq %ymm1, %zmm1 -; SKX-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1 -; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0 -; SKX-NEXT: vpaddq %zmm0, %zmm2, %zmm0 -; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1 -; SKX-NEXT: kxnorw %k0, %k0, %k1 -; SKX-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} -; SKX-NEXT: retq -; -; SKX_32-LABEL: test9: -; SKX_32: # BB#0: # %entry -; SKX_32-NEXT: vpmulld {{\.LCPI.*}}{1to8}, %ymm1, %ymm1 -; SKX_32-NEXT: vpmovqd %zmm0, %ymm0 -; SKX_32-NEXT: vpmulld {{\.LCPI.*}}{1to8}, %ymm0, %ymm0 -; SKX_32-NEXT: vpaddd {{[0-9]+}}(%esp){1to8}, %ymm0, %ymm0 -; SKX_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; SKX_32-NEXT: vpaddd {{\.LCPI.*}}{1to8}, %ymm0, %ymm1 -; SKX_32-NEXT: kxnorw %k0, %k0, %k1 -; SKX_32-NEXT: vpgatherdd (,%ymm1), %ymm0 {%k1} -; SKX_32-NEXT: retl -entry: - %broadcast.splatinsert = insertelement <8 x %struct.ST*> undef, %struct.ST* %base, i32 0 - %broadcast.splat = shufflevector <8 x %struct.ST*> %broadcast.splatinsert, <8 x %struct.ST*> undef, <8 x i32> zeroinitializer - - %arrayidx = getelementptr %struct.ST, <8 x %struct.ST*> %broadcast.splat, <8 x i64> %ind1, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>, <8 x i32><i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <8 x i32> %ind5, <8 x i64> <i64 13, i64 13, i64 13, i64 13, i64 13, i64 13, i64 13, i64 13> - %res = call <8 x i32 > @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef) - ret <8 x i32> %res -} - -define <8 x i32> @test10(%struct.ST* %base, <8 x i64> %i1, <8 x i32>%ind5) { -; KNL_64-LABEL: test10: -; KNL_64: # BB#0: # %entry -; KNL_64-NEXT: vpbroadcastq %rdi, %zmm2 -; KNL_64-NEXT: vpbroadcastq {{.*}}(%rip), %zmm3 -; KNL_64-NEXT: vpmuludq %zmm3, %zmm0, %zmm4 -; KNL_64-NEXT: vpsrlq $32, %zmm0, %zmm0 -; KNL_64-NEXT: vpmuludq %zmm3, %zmm0, %zmm0 -; KNL_64-NEXT: vpsllq $32, %zmm0, %zmm0 -; KNL_64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 -; KNL_64-NEXT: vpmovsxdq %ymm1, %zmm1 -; KNL_64-NEXT: vpmuldq {{.*}}(%rip){1to8}, %zmm1, %zmm1 -; KNL_64-NEXT: vpaddq %zmm1, %zmm0, %zmm0 -; KNL_64-NEXT: vpaddq %zmm0, %zmm4, %zmm0 -; KNL_64-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1 -; KNL_64-NEXT: kxnorw %k0, %k0, %k1 -; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test10: -; KNL_32: # BB#0: # %entry -; KNL_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %ymm2 -; KNL_32-NEXT: vpbroadcastd {{\.LCPI.*}}, %ymm3 -; KNL_32-NEXT: vpmulld %ymm3, %ymm1, %ymm1 -; KNL_32-NEXT: vpmovqd %zmm0, %ymm0 -; KNL_32-NEXT: vpbroadcastd {{\.LCPI.*}}, %ymm3 -; KNL_32-NEXT: vpmulld %ymm3, %ymm0, %ymm0 -; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; KNL_32-NEXT: vpbroadcastd {{\.LCPI.*}}, %ymm1 -; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; KNL_32-NEXT: vpaddd %ymm0, %ymm2, %ymm0 -; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1 -; KNL_32-NEXT: kxnorw %k0, %k0, %k1 -; KNL_32-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} -; KNL_32-NEXT: retl -; -; SKX-LABEL: test10: -; SKX: # BB#0: # %entry -; SKX-NEXT: vpbroadcastq %rdi, %zmm2 -; SKX-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; SKX-NEXT: vpmovsxdq %ymm1, %zmm1 -; SKX-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1 -; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0 -; SKX-NEXT: vpaddq %zmm0, %zmm2, %zmm0 -; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1 -; SKX-NEXT: kxnorw %k0, %k0, %k1 -; SKX-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} -; SKX-NEXT: retq -; -; SKX_32-LABEL: test10: -; SKX_32: # BB#0: # %entry -; SKX_32-NEXT: vpmulld {{\.LCPI.*}}{1to8}, %ymm1, %ymm1 -; SKX_32-NEXT: vpmovqd %zmm0, %ymm0 -; SKX_32-NEXT: vpmulld {{\.LCPI.*}}{1to8}, %ymm0, %ymm0 -; SKX_32-NEXT: vpaddd {{[0-9]+}}(%esp){1to8}, %ymm0, %ymm0 -; SKX_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; SKX_32-NEXT: vpaddd {{\.LCPI.*}}{1to8}, %ymm0, %ymm1 -; SKX_32-NEXT: kxnorw %k0, %k0, %k1 -; SKX_32-NEXT: vpgatherdd (,%ymm1), %ymm0 {%k1} -; SKX_32-NEXT: retl -entry: - %broadcast.splatinsert = insertelement <8 x %struct.ST*> undef, %struct.ST* %base, i32 0 - %broadcast.splat = shufflevector <8 x %struct.ST*> %broadcast.splatinsert, <8 x %struct.ST*> undef, <8 x i32> zeroinitializer - - %arrayidx = getelementptr %struct.ST, <8 x %struct.ST*> %broadcast.splat, <8 x i64> %i1, i32 2, i32 1, <8 x i32> %ind5, i64 13 - %res = call <8 x i32 > @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef) - ret <8 x i32> %res -} - -; Splat index in GEP, requires broadcast -define <16 x float> @test11(float* %base, i32 %ind) { -; KNL_64-LABEL: test11: -; KNL_64: # BB#0: -; KNL_64-NEXT: vpbroadcastd %esi, %zmm1 -; KNL_64-NEXT: kxnorw %k0, %k0, %k1 -; KNL_64-NEXT: vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1} -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test11: -; KNL_32: # BB#0: -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %zmm1 -; KNL_32-NEXT: kxnorw %k0, %k0, %k1 -; KNL_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1} -; KNL_32-NEXT: retl -; -; SKX-LABEL: test11: -; SKX: # BB#0: -; SKX-NEXT: vpbroadcastd %esi, %zmm1 -; SKX-NEXT: kxnorw %k0, %k0, %k1 -; SKX-NEXT: vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1} -; SKX-NEXT: retq -; -; SKX_32-LABEL: test11: -; SKX_32: # BB#0: -; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SKX_32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %zmm1 -; SKX_32-NEXT: kxnorw %k0, %k0, %k1 -; SKX_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1} -; SKX_32-NEXT: retl - - %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0 - %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer - - %gep.random = getelementptr float, <16 x float*> %broadcast.splat, i32 %ind - - %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) - ret <16 x float>%res -} - -; We are checking the uniform base here. It is taken directly from input to vgatherdps -define <16 x float> @test12(float* %base, <16 x i32> %ind) { -; KNL_64-LABEL: test12: -; KNL_64: # BB#0: -; KNL_64-NEXT: kxnorw %k0, %k0, %k1 -; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} -; KNL_64-NEXT: vmovaps %zmm1, %zmm0 -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test12: -; KNL_32: # BB#0: -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: kxnorw %k0, %k0, %k1 -; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1} -; KNL_32-NEXT: vmovaps %zmm1, %zmm0 -; KNL_32-NEXT: retl -; -; SKX-LABEL: test12: -; SKX: # BB#0: -; SKX-NEXT: kxnorw %k0, %k0, %k1 -; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 -; SKX-NEXT: retq -; -; SKX_32-LABEL: test12: -; SKX_32: # BB#0: -; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SKX_32-NEXT: kxnorw %k0, %k0, %k1 -; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1} -; SKX_32-NEXT: vmovaps %zmm1, %zmm0 -; SKX_32-NEXT: retl - - %sext_ind = sext <16 x i32> %ind to <16 x i64> - %gep.random = getelementptr float, float *%base, <16 x i64> %sext_ind - - %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) - ret <16 x float>%res -} - -; The same as the previous, but the mask is undefined -define <16 x float> @test13(float* %base, <16 x i32> %ind) { -; KNL_64-LABEL: test13: -; KNL_64: # BB#0: -; KNL_64-NEXT: kxnorw %k0, %k0, %k1 -; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} -; KNL_64-NEXT: vmovaps %zmm1, %zmm0 -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test13: -; KNL_32: # BB#0: -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: kxnorw %k0, %k0, %k1 -; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1} -; KNL_32-NEXT: vmovaps %zmm1, %zmm0 -; KNL_32-NEXT: retl -; -; SKX-LABEL: test13: -; SKX: # BB#0: -; SKX-NEXT: kxnorw %k0, %k0, %k1 -; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 -; SKX-NEXT: retq -; -; SKX_32-LABEL: test13: -; SKX_32: # BB#0: -; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SKX_32-NEXT: kxnorw %k0, %k0, %k1 -; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1} -; SKX_32-NEXT: vmovaps %zmm1, %zmm0 -; SKX_32-NEXT: retl - - %sext_ind = sext <16 x i32> %ind to <16 x i64> - %gep.random = getelementptr float, float *%base, <16 x i64> %sext_ind - - %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) - ret <16 x float>%res -} - -; The base pointer is not splat, can't find unform base -define <16 x float> @test14(float* %base, i32 %ind, <16 x float*> %vec) { -; KNL_64-LABEL: test14: -; KNL_64: # BB#0: -; KNL_64-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 -; KNL_64-NEXT: vpbroadcastq %xmm0, %zmm0 -; KNL_64-NEXT: vmovd %esi, %xmm1 -; KNL_64-NEXT: vpbroadcastd %xmm1, %ymm1 -; KNL_64-NEXT: vpmovsxdq %ymm1, %zmm1 -; KNL_64-NEXT: vpsllq $2, %zmm1, %zmm1 -; KNL_64-NEXT: vpaddq %zmm1, %zmm0, %zmm0 -; KNL_64-NEXT: kxnorw %k0, %k0, %k1 -; KNL_64-NEXT: kshiftrw $8, %k1, %k2 -; KNL_64-NEXT: vgatherqps (,%zmm0), %ymm1 {%k2} -; KNL_64-NEXT: vgatherqps (,%zmm0), %ymm2 {%k1} -; KNL_64-NEXT: vinsertf64x4 $1, %ymm1, %zmm2, %zmm0 -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test14: -; KNL_32: # BB#0: -; KNL_32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; KNL_32-NEXT: vpbroadcastd %xmm0, %zmm0 -; KNL_32-NEXT: vpslld $2, {{[0-9]+}}(%esp){1to16}, %zmm1 -; KNL_32-NEXT: vpaddd %zmm1, %zmm0, %zmm1 -; KNL_32-NEXT: kxnorw %k0, %k0, %k1 -; KNL_32-NEXT: vgatherdps (,%zmm1), %zmm0 {%k1} -; KNL_32-NEXT: retl -; -; SKX-LABEL: test14: -; SKX: # BB#0: -; SKX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 -; SKX-NEXT: vpbroadcastq %xmm0, %zmm0 -; SKX-NEXT: vpbroadcastd %esi, %ymm1 -; SKX-NEXT: vpmovsxdq %ymm1, %zmm1 -; SKX-NEXT: vpsllq $2, %zmm1, %zmm1 -; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0 -; SKX-NEXT: kxnorw %k0, %k0, %k1 -; SKX-NEXT: kshiftrw $8, %k1, %k2 -; SKX-NEXT: vgatherqps (,%zmm0), %ymm1 {%k2} -; SKX-NEXT: vgatherqps (,%zmm0), %ymm2 {%k1} -; SKX-NEXT: vinsertf32x8 $1, %ymm1, %zmm2, %zmm0 -; SKX-NEXT: retq -; -; SKX_32-LABEL: test14: -; SKX_32: # BB#0: -; SKX_32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; SKX_32-NEXT: vpbroadcastd %xmm0, %zmm0 -; SKX_32-NEXT: vpslld $2, {{[0-9]+}}(%esp){1to16}, %zmm1 -; SKX_32-NEXT: vpaddd %zmm1, %zmm0, %zmm1 -; SKX_32-NEXT: kxnorw %k0, %k0, %k1 -; SKX_32-NEXT: vgatherdps (,%zmm1), %zmm0 {%k1} -; SKX_32-NEXT: retl - - %broadcast.splatinsert = insertelement <16 x float*> %vec, float* %base, i32 1 - %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer - - %gep.random = getelementptr float, <16 x float*> %broadcast.splat, i32 %ind - - %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) - ret <16 x float>%res -} - -declare <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*>, i32, <4 x i1>, <4 x float>) -declare <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*>, i32, <4 x i1>, <4 x double>) -declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*>, i32, <2 x i1>, <2 x double>) - -; Gather smaller than existing instruction -define <4 x float> @test15(float* %base, <4 x i32> %ind, <4 x i1> %mask) { -; -; KNL_64-LABEL: test15: -; KNL_64: # BB#0: -; KNL_64-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def> -; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> -; KNL_64-NEXT: vpxor %ymm2, %ymm2, %ymm2 -; KNL_64-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] -; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm2 -; KNL_64-NEXT: vpslld $31, %ymm1, %ymm0 -; KNL_64-NEXT: vptestmd %zmm0, %zmm0, %k1 -; KNL_64-NEXT: vgatherqps (%rdi,%zmm2,4), %ymm0 {%k1} -; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> -; KNL_64-NEXT: vzeroupper -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test15: -; KNL_32: # BB#0: -; KNL_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def> -; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> -; KNL_32-NEXT: vpxor %ymm2, %ymm2, %ymm2 -; KNL_32-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm2 -; KNL_32-NEXT: vpslld $31, %ymm1, %ymm0 -; KNL_32-NEXT: vptestmd %zmm0, %zmm0, %k1 -; KNL_32-NEXT: vgatherqps (%eax,%zmm2,4), %ymm0 {%k1} -; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> -; KNL_32-NEXT: vzeroupper -; KNL_32-NEXT: retl -; -; SKX-LABEL: test15: -; SKX: # BB#0: -; SKX-NEXT: vpslld $31, %xmm1, %xmm1 -; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1 -; SKX-NEXT: vgatherdps (%rdi,%xmm0,4), %xmm1 {%k1} -; SKX-NEXT: vmovaps %xmm1, %xmm0 -; SKX-NEXT: retq -; -; SKX_32-LABEL: test15: -; SKX_32: # BB#0: -; SKX_32-NEXT: vpslld $31, %xmm1, %xmm1 -; SKX_32-NEXT: vptestmd %xmm1, %xmm1, %k1 -; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SKX_32-NEXT: vgatherdps (%eax,%xmm0,4), %xmm1 {%k1} -; SKX_32-NEXT: vmovaps %xmm1, %xmm0 -; SKX_32-NEXT: retl - - %sext_ind = sext <4 x i32> %ind to <4 x i64> - %gep.random = getelementptr float, float* %base, <4 x i64> %sext_ind - %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.random, i32 4, <4 x i1> %mask, <4 x float> undef) - ret <4 x float>%res -} - -; Gather smaller than existing instruction -define <4 x double> @test16(double* %base, <4 x i32> %ind, <4 x i1> %mask, <4 x double> %src0) { -; -; KNL_64-LABEL: test16: -; KNL_64: # BB#0: -; KNL_64-NEXT: # kill: %YMM2<def> %YMM2<kill> %ZMM2<def> -; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> -; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL_64-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1 -; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3 -; KNL_64-NEXT: vinserti64x4 $0, %ymm1, %zmm3, %zmm1 -; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0 -; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1 -; KNL_64-NEXT: vgatherqpd (%rdi,%zmm0,8), %zmm2 {%k1} -; KNL_64-NEXT: vmovapd %ymm2, %ymm0 -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test16: -; KNL_32: # BB#0: -; KNL_32-NEXT: # kill: %YMM2<def> %YMM2<kill> %ZMM2<def> -; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> -; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL_32-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL_32-NEXT: vpmovsxdq %xmm1, %ymm1 -; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3 -; KNL_32-NEXT: vinserti64x4 $0, %ymm1, %zmm3, %zmm1 -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0 -; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 -; KNL_32-NEXT: vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1} -; KNL_32-NEXT: vmovapd %ymm2, %ymm0 -; KNL_32-NEXT: retl -; -; SKX-LABEL: test16: -; SKX: # BB#0: -; SKX-NEXT: vpslld $31, %xmm1, %xmm1 -; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1 -; SKX-NEXT: vgatherdpd (%rdi,%xmm0,8), %ymm2 {%k1} -; SKX-NEXT: vmovapd %ymm2, %ymm0 -; SKX-NEXT: retq -; -; SKX_32-LABEL: test16: -; SKX_32: # BB#0: -; SKX_32-NEXT: vpslld $31, %xmm1, %xmm1 -; SKX_32-NEXT: vptestmd %xmm1, %xmm1, %k1 -; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SKX_32-NEXT: vgatherdpd (%eax,%xmm0,8), %ymm2 {%k1} -; SKX_32-NEXT: vmovapd %ymm2, %ymm0 -; SKX_32-NEXT: retl - - %sext_ind = sext <4 x i32> %ind to <4 x i64> - %gep.random = getelementptr double, double* %base, <4 x i64> %sext_ind - %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %gep.random, i32 4, <4 x i1> %mask, <4 x double> %src0) - ret <4 x double>%res -} - -define <2 x double> @test17(double* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x double> %src0) { -; -; KNL_64-LABEL: test17: -; KNL_64: # BB#0: -; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def> -; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> -; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3 -; KNL_64-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1 -; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1 -; KNL_64-NEXT: vgatherqpd (%rdi,%zmm0,8), %zmm2 {%k1} -; KNL_64-NEXT: vmovapd %xmm2, %xmm0 -; KNL_64-NEXT: vzeroupper -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test17: -; KNL_32: # BB#0: -; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def> -; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> -; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3 -; KNL_32-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1 -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 -; KNL_32-NEXT: vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1} -; KNL_32-NEXT: vmovapd %xmm2, %xmm0 -; KNL_32-NEXT: vzeroupper -; KNL_32-NEXT: retl -; -; SKX-LABEL: test17: -; SKX: # BB#0: -; SKX-NEXT: vpsllq $63, %xmm1, %xmm1 -; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1 -; SKX-NEXT: vgatherqpd (%rdi,%xmm0,8), %xmm2 {%k1} -; SKX-NEXT: vmovapd %xmm2, %xmm0 -; SKX-NEXT: retq -; -; SKX_32-LABEL: test17: -; SKX_32: # BB#0: -; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1 -; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1 -; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SKX_32-NEXT: vgatherqpd (%eax,%xmm0,8), %xmm2 {%k1} -; SKX_32-NEXT: vmovapd %xmm2, %xmm0 -; SKX_32-NEXT: retl - - %sext_ind = sext <2 x i32> %ind to <2 x i64> - %gep.random = getelementptr double, double* %base, <2 x i64> %sext_ind - %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %gep.random, i32 4, <2 x i1> %mask, <2 x double> %src0) - ret <2 x double>%res -} - -declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> , <4 x i32*> , i32 , <4 x i1> ) -declare void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> , <4 x double*> , i32 , <4 x i1> ) -declare void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> , <2 x i64*> , i32 , <2 x i1> ) -declare void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> ) -declare void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> , <2 x float*> , i32 , <2 x i1> ) - -define void @test18(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) { -; -; KNL_64-LABEL: test18: -; KNL_64: # BB#0: -; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %YMM2<def> -; KNL_64-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def> -; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> -; KNL_64-NEXT: vpxor %ymm3, %ymm3, %ymm3 -; KNL_64-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7] -; KNL_64-NEXT: vpslld $31, %ymm2, %ymm2 -; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 -; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} -; KNL_64-NEXT: vzeroupper -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test18: -; KNL_32: # BB#0: -; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %YMM2<def> -; KNL_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def> -; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> -; KNL_32-NEXT: vpxor %ymm3, %ymm3, %ymm3 -; KNL_32-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7] -; KNL_32-NEXT: vpmovsxdq %ymm1, %zmm1 -; KNL_32-NEXT: vpslld $31, %ymm2, %ymm2 -; KNL_32-NEXT: vptestmd %zmm2, %zmm2, %k1 -; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} -; KNL_32-NEXT: vzeroupper -; KNL_32-NEXT: retl -; -; SKX-LABEL: test18: -; SKX: # BB#0: -; SKX-NEXT: vpslld $31, %xmm2, %xmm2 -; SKX-NEXT: vptestmd %xmm2, %xmm2, %k1 -; SKX-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1} -; SKX-NEXT: vzeroupper -; SKX-NEXT: retq -; -; SKX_32-LABEL: test18: -; SKX_32: # BB#0: -; SKX_32-NEXT: vpslld $31, %xmm2, %xmm2 -; SKX_32-NEXT: vptestmd %xmm2, %xmm2, %k1 -; SKX_32-NEXT: vpscatterdd %xmm0, (,%xmm1) {%k1} -; SKX_32-NEXT: retl - call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) - ret void -} - -define void @test19(<4 x double>%a1, double* %ptr, <4 x i1>%mask, <4 x i64> %ind) { -; -; KNL_64-LABEL: test19: -; KNL_64: # BB#0: -; KNL_64-NEXT: # kill: %YMM2<def> %YMM2<kill> %ZMM2<def> -; KNL_64-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> -; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL_64-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1 -; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3 -; KNL_64-NEXT: vinserti64x4 $0, %ymm1, %zmm3, %zmm1 -; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1 -; KNL_64-NEXT: vscatterqpd %zmm0, (%rdi,%zmm2,8) {%k1} -; KNL_64-NEXT: vzeroupper -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test19: -; KNL_32: # BB#0: -; KNL_32-NEXT: # kill: %YMM2<def> %YMM2<kill> %ZMM2<def> -; KNL_32-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> -; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL_32-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL_32-NEXT: vpmovsxdq %xmm1, %ymm1 -; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3 -; KNL_32-NEXT: vinserti64x4 $0, %ymm1, %zmm3, %zmm1 -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 -; KNL_32-NEXT: vscatterqpd %zmm0, (%eax,%zmm2,8) {%k1} -; KNL_32-NEXT: vzeroupper -; KNL_32-NEXT: retl -; -; SKX-LABEL: test19: -; SKX: # BB#0: -; SKX-NEXT: vpslld $31, %xmm1, %xmm1 -; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1 -; SKX-NEXT: vscatterqpd %ymm0, (%rdi,%ymm2,8) {%k1} -; SKX-NEXT: vzeroupper -; SKX-NEXT: retq -; -; SKX_32-LABEL: test19: -; SKX_32: # BB#0: -; SKX_32-NEXT: vpslld $31, %xmm1, %xmm1 -; SKX_32-NEXT: vptestmd %xmm1, %xmm1, %k1 -; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SKX_32-NEXT: vscatterqpd %ymm0, (%eax,%ymm2,8) {%k1} -; SKX_32-NEXT: vzeroupper -; SKX_32-NEXT: retl - %gep = getelementptr double, double* %ptr, <4 x i64> %ind - call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> %a1, <4 x double*> %gep, i32 8, <4 x i1> %mask) - ret void -} - -; Data type requires widening -define void @test20(<2 x float>%a1, <2 x float*> %ptr, <2 x i1> %mask) { -; -; KNL_64-LABEL: test20: -; KNL_64: # BB#0: -; KNL_64-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def> -; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> -; KNL_64-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,2],zero,zero -; KNL_64-NEXT: vpxor %ymm3, %ymm3, %ymm3 -; KNL_64-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7] -; KNL_64-NEXT: vpslld $31, %ymm2, %ymm2 -; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 -; KNL_64-NEXT: vscatterqps %ymm0, (,%zmm1) {%k1} -; KNL_64-NEXT: vzeroupper -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test20: -; KNL_32: # BB#0: -; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> -; KNL_32-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,2],zero,zero -; KNL_32-NEXT: vpxor %ymm3, %ymm3, %ymm3 -; KNL_32-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7] -; KNL_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; KNL_32-NEXT: vpmovsxdq %ymm1, %zmm1 -; KNL_32-NEXT: vpslld $31, %ymm2, %ymm2 -; KNL_32-NEXT: vptestmd %zmm2, %zmm2, %k1 -; KNL_32-NEXT: vscatterqps %ymm0, (,%zmm1) {%k1} -; KNL_32-NEXT: vzeroupper -; KNL_32-NEXT: retl -; -; SKX-LABEL: test20: -; SKX: # BB#0: -; SKX-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def> -; SKX-NEXT: vpsllq $63, %xmm2, %xmm2 -; SKX-NEXT: vptestmq %xmm2, %xmm2, %k0 -; SKX-NEXT: kshiftlb $6, %k0, %k0 -; SKX-NEXT: kshiftrb $6, %k0, %k1 -; SKX-NEXT: vscatterqps %xmm0, (,%ymm1) {%k1} -; SKX-NEXT: vzeroupper -; SKX-NEXT: retq -; -; SKX_32-LABEL: test20: -; SKX_32: # BB#0: -; SKX_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; SKX_32-NEXT: vpsllq $63, %xmm2, %xmm2 -; SKX_32-NEXT: vptestmq %xmm2, %xmm2, %k0 -; SKX_32-NEXT: kshiftlb $6, %k0, %k0 -; SKX_32-NEXT: kshiftrb $6, %k0, %k1 -; SKX_32-NEXT: vscatterdps %xmm0, (,%xmm1) {%k1} -; SKX_32-NEXT: retl - call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> %a1, <2 x float*> %ptr, i32 4, <2 x i1> %mask) - ret void -} - -; Data type requires promotion -define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) { -; -; KNL_64-LABEL: test21: -; KNL_64: # BB#0: -; KNL_64-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def> -; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3 -; KNL_64-NEXT: vinserti32x4 $0, %xmm2, %zmm3, %zmm2 -; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; KNL_64-NEXT: vpsllq $63, %zmm2, %zmm2 -; KNL_64-NEXT: vptestmq %zmm2, %zmm2, %k1 -; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} -; KNL_64-NEXT: vzeroupper -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test21: -; KNL_32: # BB#0: -; KNL_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def> -; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3 -; KNL_32-NEXT: vinserti32x4 $0, %xmm2, %zmm3, %zmm2 -; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; KNL_32-NEXT: vpsllq $63, %zmm2, %zmm2 -; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1 -; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} -; KNL_32-NEXT: vzeroupper -; KNL_32-NEXT: retl -; -; SKX-LABEL: test21: -; SKX: # BB#0: -; SKX-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def> -; SKX-NEXT: vpsllq $63, %xmm2, %xmm2 -; SKX-NEXT: vptestmq %xmm2, %xmm2, %k0 -; SKX-NEXT: kshiftlb $6, %k0, %k0 -; SKX-NEXT: kshiftrb $6, %k0, %k1 -; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SKX-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1} -; SKX-NEXT: vzeroupper -; SKX-NEXT: retq -; -; SKX_32-LABEL: test21: -; SKX_32: # BB#0: -; SKX_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def> -; SKX_32-NEXT: vpsllq $63, %xmm2, %xmm2 -; SKX_32-NEXT: vptestmq %xmm2, %xmm2, %k0 -; SKX_32-NEXT: kshiftlb $6, %k0, %k0 -; SKX_32-NEXT: kshiftrb $6, %k0, %k1 -; SKX_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SKX_32-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1} -; SKX_32-NEXT: vzeroupper -; SKX_32-NEXT: retl - call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> %mask) - ret void -} - -; The result type requires widening -declare <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*>, i32, <2 x i1>, <2 x float>) - -define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x float> %src0) { -; -; -; KNL_64-LABEL: test22: -; KNL_64: # BB#0: -; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %YMM2<def> -; KNL_64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL_64-NEXT: vpxor %ymm3, %ymm3, %ymm3 -; KNL_64-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7] -; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0 -; KNL_64-NEXT: vpslld $31, %ymm1, %ymm1 -; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k1 -; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k1} -; KNL_64-NEXT: vmovaps %xmm2, %xmm0 -; KNL_64-NEXT: vzeroupper -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test22: -; KNL_32: # BB#0: -; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %YMM2<def> -; KNL_32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL_32-NEXT: vpxor %ymm3, %ymm3, %ymm3 -; KNL_32-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7] -; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0 -; KNL_32-NEXT: vpslld $31, %ymm1, %ymm1 -; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 -; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k1} -; KNL_32-NEXT: vmovaps %xmm2, %xmm0 -; KNL_32-NEXT: vzeroupper -; KNL_32-NEXT: retl -; -; SKX-LABEL: test22: -; SKX: # BB#0: -; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SKX-NEXT: vpsllq $63, %xmm1, %xmm1 -; SKX-NEXT: vptestmq %xmm1, %xmm1, %k0 -; SKX-NEXT: kshiftlb $6, %k0, %k0 -; SKX-NEXT: kshiftrb $6, %k0, %k1 -; SKX-NEXT: vgatherdps (%rdi,%xmm0,4), %xmm2 {%k1} -; SKX-NEXT: vmovaps %xmm2, %xmm0 -; SKX-NEXT: retq -; -; SKX_32-LABEL: test22: -; SKX_32: # BB#0: -; SKX_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1 -; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k0 -; SKX_32-NEXT: kshiftlb $6, %k0, %k0 -; SKX_32-NEXT: kshiftrb $6, %k0, %k1 -; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SKX_32-NEXT: vgatherdps (%eax,%xmm0,4), %xmm2 {%k1} -; SKX_32-NEXT: vmovaps %xmm2, %xmm0 -; SKX_32-NEXT: retl - %sext_ind = sext <2 x i32> %ind to <2 x i64> - %gep.random = getelementptr float, float* %base, <2 x i64> %sext_ind - %res = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> %gep.random, i32 4, <2 x i1> %mask, <2 x float> %src0) - ret <2 x float>%res -} - -define <2 x float> @test22a(float* %base, <2 x i64> %ind, <2 x i1> %mask, <2 x float> %src0) { -; KNL_64-LABEL: test22a: -; KNL_64: # BB#0: -; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %YMM2<def> -; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> -; KNL_64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL_64-NEXT: vpxor %ymm3, %ymm3, %ymm3 -; KNL_64-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7] -; KNL_64-NEXT: vpslld $31, %ymm1, %ymm1 -; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k1 -; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k1} -; KNL_64-NEXT: vmovaps %xmm2, %xmm0 -; KNL_64-NEXT: vzeroupper -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test22a: -; KNL_32: # BB#0: -; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %YMM2<def> -; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> -; KNL_32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL_32-NEXT: vpxor %ymm3, %ymm3, %ymm3 -; KNL_32-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7] -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpslld $31, %ymm1, %ymm1 -; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 -; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k1} -; KNL_32-NEXT: vmovaps %xmm2, %xmm0 -; KNL_32-NEXT: vzeroupper -; KNL_32-NEXT: retl -; -; SKX-LABEL: test22a: -; SKX: # BB#0: -; SKX-NEXT: vpsllq $63, %xmm1, %xmm1 -; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1 -; SKX-NEXT: vgatherqps (%rdi,%xmm0,4), %xmm2 {%k1} -; SKX-NEXT: vmovaps %xmm2, %xmm0 -; SKX-NEXT: retq -; -; SKX_32-LABEL: test22a: -; SKX_32: # BB#0: -; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1 -; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1 -; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SKX_32-NEXT: vgatherqps (%eax,%xmm0,4), %xmm2 {%k1} -; SKX_32-NEXT: vmovaps %xmm2, %xmm0 -; SKX_32-NEXT: retl - %gep.random = getelementptr float, float* %base, <2 x i64> %ind - %res = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> %gep.random, i32 4, <2 x i1> %mask, <2 x float> %src0) - ret <2 x float>%res -} - -declare <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>) -declare <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*>, i32, <2 x i1>, <2 x i64>) - -define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %src0) { -; -; KNL_64-LABEL: test23: -; KNL_64: # BB#0: -; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def> -; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> -; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3 -; KNL_64-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1 -; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1 -; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm2 {%k1} -; KNL_64-NEXT: vmovdqa %xmm2, %xmm0 -; KNL_64-NEXT: vzeroupper -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test23: -; KNL_32: # BB#0: -; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def> -; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> -; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3 -; KNL_32-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1 -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 -; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1} -; KNL_32-NEXT: vmovdqa %xmm2, %xmm0 -; KNL_32-NEXT: vzeroupper -; KNL_32-NEXT: retl -; -; SKX-LABEL: test23: -; SKX: # BB#0: -; SKX-NEXT: vpsllq $63, %xmm1, %xmm1 -; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1 -; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] -; SKX-NEXT: vpgatherqd (%rdi,%xmm0,4), %xmm1 {%k1} -; SKX-NEXT: vpmovsxdq %xmm1, %xmm0 -; SKX-NEXT: retq -; -; SKX_32-LABEL: test23: -; SKX_32: # BB#0: -; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1 -; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1 -; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SKX_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] -; SKX_32-NEXT: vpgatherqd (%eax,%xmm0,4), %xmm1 {%k1} -; SKX_32-NEXT: vpmovsxdq %xmm1, %xmm0 -; SKX_32-NEXT: retl - %sext_ind = sext <2 x i32> %ind to <2 x i64> - %gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind - %res = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %gep.random, i32 4, <2 x i1> %mask, <2 x i32> %src0) - ret <2 x i32>%res -} - -define <2 x i32> @test24(i32* %base, <2 x i32> %ind) { -; KNL_64-LABEL: test24: -; KNL_64: # BB#0: -; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> -; KNL_64-NEXT: movb $3, %al -; KNL_64-NEXT: kmovw %eax, %k1 -; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm1 {%k1} -; KNL_64-NEXT: vmovdqa %xmm1, %xmm0 -; KNL_64-NEXT: vzeroupper -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test24: -; KNL_32: # BB#0: -; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpxord %zmm1, %zmm1, %zmm1 -; KNL_32-NEXT: vinserti32x4 $0, {{\.LCPI.*}}, %zmm1, %zmm1 -; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 -; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm1 {%k1} -; KNL_32-NEXT: vmovdqa %xmm1, %xmm0 -; KNL_32-NEXT: vzeroupper -; KNL_32-NEXT: retl -; -; SKX-LABEL: test24: -; SKX: # BB#0: -; SKX-NEXT: kxnorw %k0, %k0, %k1 -; SKX-NEXT: vpgatherqd (%rdi,%xmm0,4), %xmm1 {%k1} -; SKX-NEXT: vpmovsxdq %xmm1, %xmm0 -; SKX-NEXT: retq -; -; SKX_32-LABEL: test24: -; SKX_32: # BB#0: -; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SKX_32-NEXT: kxnorw %k0, %k0, %k1 -; SKX_32-NEXT: vpgatherqd (%eax,%xmm0,4), %xmm1 {%k1} -; SKX_32-NEXT: vpmovsxdq %xmm1, %xmm0 -; SKX_32-NEXT: retl - %sext_ind = sext <2 x i32> %ind to <2 x i64> - %gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind - %res = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %gep.random, i32 4, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) - ret <2 x i32>%res -} - -define <2 x i64> @test25(i64* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> %src0) { -; -; KNL_64-LABEL: test25: -; KNL_64: # BB#0: -; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def> -; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> -; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3 -; KNL_64-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1 -; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1 -; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm2 {%k1} -; KNL_64-NEXT: vmovdqa %xmm2, %xmm0 -; KNL_64-NEXT: vzeroupper -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test25: -; KNL_32: # BB#0: -; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def> -; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> -; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3 -; KNL_32-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1 -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 -; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1} -; KNL_32-NEXT: vmovdqa %xmm2, %xmm0 -; KNL_32-NEXT: vzeroupper -; KNL_32-NEXT: retl -; -; SKX-LABEL: test25: -; SKX: # BB#0: -; SKX-NEXT: vpsllq $63, %xmm1, %xmm1 -; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1 -; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm2 {%k1} -; SKX-NEXT: vmovdqa %xmm2, %xmm0 -; SKX-NEXT: retq -; -; SKX_32-LABEL: test25: -; SKX_32: # BB#0: -; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1 -; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1 -; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SKX_32-NEXT: vpgatherqq (%eax,%xmm0,8), %xmm2 {%k1} -; SKX_32-NEXT: vmovdqa %xmm2, %xmm0 -; SKX_32-NEXT: retl - %sext_ind = sext <2 x i32> %ind to <2 x i64> - %gep.random = getelementptr i64, i64* %base, <2 x i64> %sext_ind - %res = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %gep.random, i32 8, <2 x i1> %mask, <2 x i64> %src0) - ret <2 x i64>%res -} - -define <2 x i64> @test26(i64* %base, <2 x i32> %ind, <2 x i64> %src0) { -; -; KNL_64-LABEL: test26: -; KNL_64: # BB#0: -; KNL_64-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def> -; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> -; KNL_64-NEXT: movb $3, %al -; KNL_64-NEXT: kmovw %eax, %k1 -; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm1 {%k1} -; KNL_64-NEXT: vmovdqa %xmm1, %xmm0 -; KNL_64-NEXT: vzeroupper -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test26: -; KNL_32: # BB#0: -; KNL_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def> -; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpxord %zmm2, %zmm2, %zmm2 -; KNL_32-NEXT: vinserti32x4 $0, {{\.LCPI.*}}, %zmm2, %zmm2 -; KNL_32-NEXT: vpsllq $63, %zmm2, %zmm2 -; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1 -; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm1 {%k1} -; KNL_32-NEXT: vmovdqa %xmm1, %xmm0 -; KNL_32-NEXT: vzeroupper -; KNL_32-NEXT: retl -; -; SKX-LABEL: test26: -; SKX: # BB#0: -; SKX-NEXT: kxnorw %k0, %k0, %k1 -; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm1 {%k1} -; SKX-NEXT: vmovdqa %xmm1, %xmm0 -; SKX-NEXT: retq -; -; SKX_32-LABEL: test26: -; SKX_32: # BB#0: -; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SKX_32-NEXT: kxnorw %k0, %k0, %k1 -; SKX_32-NEXT: vpgatherqq (%eax,%xmm0,8), %xmm1 {%k1} -; SKX_32-NEXT: vmovdqa %xmm1, %xmm0 -; SKX_32-NEXT: retl - %sext_ind = sext <2 x i32> %ind to <2 x i64> - %gep.random = getelementptr i64, i64* %base, <2 x i64> %sext_ind - %res = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %gep.random, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> %src0) - ret <2 x i64>%res -} - -; Result type requires widening; all-ones mask -define <2 x float> @test27(float* %base, <2 x i32> %ind) { -; -; KNL_64-LABEL: test27: -; KNL_64: # BB#0: -; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm1 -; KNL_64-NEXT: movb $3, %al -; KNL_64-NEXT: kmovw %eax, %k1 -; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1} -; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> -; KNL_64-NEXT: vzeroupper -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test27: -; KNL_32: # BB#0: -; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1 -; KNL_32-NEXT: movb $3, %cl -; KNL_32-NEXT: kmovw %ecx, %k1 -; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1} -; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> -; KNL_32-NEXT: vzeroupper -; KNL_32-NEXT: retl -; -; SKX-LABEL: test27: -; SKX: # BB#0: -; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,2,2,3] -; SKX-NEXT: movb $3, %al -; SKX-NEXT: kmovw %eax, %k1 -; SKX-NEXT: vgatherdps (%rdi,%xmm1,4), %xmm0 {%k1} -; SKX-NEXT: retq -; -; SKX_32-LABEL: test27: -; SKX_32: # BB#0: -; SKX_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,2,2,3] -; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SKX_32-NEXT: movb $3, %cl -; SKX_32-NEXT: kmovw %ecx, %k1 -; SKX_32-NEXT: vgatherdps (%eax,%xmm1,4), %xmm0 {%k1} -; SKX_32-NEXT: retl - %sext_ind = sext <2 x i32> %ind to <2 x i64> - %gep.random = getelementptr float, float* %base, <2 x i64> %sext_ind - %res = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> %gep.random, i32 4, <2 x i1> <i1 true, i1 true>, <2 x float> undef) - ret <2 x float>%res -} - -; Data type requires promotion, mask is all-ones -define void @test28(<2 x i32>%a1, <2 x i32*> %ptr) { -; -; -; KNL_64-LABEL: test28: -; KNL_64: # BB#0: -; KNL_64-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def> -; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; KNL_64-NEXT: movb $3, %al -; KNL_64-NEXT: kmovw %eax, %k1 -; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} -; KNL_64-NEXT: vzeroupper -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test28: -; KNL_32: # BB#0: -; KNL_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def> -; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; KNL_32-NEXT: vpxord %zmm2, %zmm2, %zmm2 -; KNL_32-NEXT: vinserti32x4 $0, {{\.LCPI.*}}, %zmm2, %zmm2 -; KNL_32-NEXT: vpsllq $63, %zmm2, %zmm2 -; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1 -; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} -; KNL_32-NEXT: vzeroupper -; KNL_32-NEXT: retl -; -; SKX-LABEL: test28: -; SKX: # BB#0: -; SKX-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def> -; SKX-NEXT: movb $3, %al -; SKX-NEXT: kmovw %eax, %k1 -; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SKX-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1} -; SKX-NEXT: vzeroupper -; SKX-NEXT: retq -; -; SKX_32-LABEL: test28: -; SKX_32: # BB#0: -; SKX_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def> -; SKX_32-NEXT: movb $3, %al -; SKX_32-NEXT: kmovw %eax, %k1 -; SKX_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SKX_32-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1} -; SKX_32-NEXT: vzeroupper -; SKX_32-NEXT: retl - call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> <i1 true, i1 true>) - ret void -} - - -; SCALAR-LABEL: test29 -; SCALAR: extractelement <16 x float*> -; SCALAR-NEXT: load float -; SCALAR-NEXT: insertelement <16 x float> -; SCALAR-NEXT: extractelement <16 x float*> -; SCALAR-NEXT: load float - -define <16 x float> @test29(float* %base, <16 x i32> %ind) { -; KNL_64-LABEL: test29: -; KNL_64: # BB#0: -; KNL_64-NEXT: movw $44, %ax -; KNL_64-NEXT: kmovw %eax, %k1 -; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} -; KNL_64-NEXT: vmovaps %zmm1, %zmm0 -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test29: -; KNL_32: # BB#0: -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: movw $44, %cx -; KNL_32-NEXT: kmovw %ecx, %k1 -; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1} -; KNL_32-NEXT: vmovaps %zmm1, %zmm0 -; KNL_32-NEXT: retl -; -; SKX-LABEL: test29: -; SKX: # BB#0: -; SKX-NEXT: movw $44, %ax -; SKX-NEXT: kmovw %eax, %k1 -; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 -; SKX-NEXT: retq -; -; SKX_32-LABEL: test29: -; SKX_32: # BB#0: -; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SKX_32-NEXT: movw $44, %cx -; SKX_32-NEXT: kmovw %ecx, %k1 -; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1} -; SKX_32-NEXT: vmovaps %zmm1, %zmm0 -; SKX_32-NEXT: retl - - %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0 - %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer - - %sext_ind = sext <16 x i32> %ind to <16 x i64> - %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind - - %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x float> undef) - ret <16 x float>%res -} - -; Check non-power-of-2 case. It should be scalarized. -declare <3 x i32> @llvm.masked.gather.v3i32.v3p0i32(<3 x i32*>, i32, <3 x i1>, <3 x i32>) -define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32> %src0) { -; ALL-LABEL: test30 -; ALL-NOT: gather - - %sext_ind = sext <3 x i32> %ind to <3 x i64> - %gep.random = getelementptr i32, <3 x i32*> %base, <3 x i64> %sext_ind - %res = call <3 x i32> @llvm.masked.gather.v3i32.v3p0i32(<3 x i32*> %gep.random, i32 4, <3 x i1> %mask, <3 x i32> %src0) - ret <3 x i32>%res -} - -declare <16 x float*> @llvm.masked.gather.v16p0f32.v16p0p0f32(<16 x float**>, i32, <16 x i1>, <16 x float*>) - -; KNL-LABEL: test31 -; KNL: vpgatherqq -; KNL: vpgatherqq -define <16 x float*> @test31(<16 x float**> %ptrs) { -; KNL_64-LABEL: test31: -; KNL_64: # BB#0: -; KNL_64-NEXT: kxnorw %k0, %k0, %k1 -; KNL_64-NEXT: kxnorw %k0, %k0, %k2 -; KNL_64-NEXT: vpgatherqq (,%zmm0), %zmm2 {%k2} -; KNL_64-NEXT: kshiftrw $8, %k1, %k1 -; KNL_64-NEXT: vpgatherqq (,%zmm1), %zmm3 {%k1} -; KNL_64-NEXT: vmovdqa64 %zmm2, %zmm0 -; KNL_64-NEXT: vmovdqa64 %zmm3, %zmm1 -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test31: -; KNL_32: # BB#0: -; KNL_32-NEXT: kxnorw %k0, %k0, %k1 -; KNL_32-NEXT: vpgatherdd (,%zmm0), %zmm1 {%k1} -; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm0 -; KNL_32-NEXT: retl -; -; SKX-LABEL: test31: -; SKX: # BB#0: -; SKX-NEXT: kxnorw %k0, %k0, %k1 -; SKX-NEXT: kxnorw %k0, %k0, %k2 -; SKX-NEXT: vpgatherqq (,%zmm0), %zmm2 {%k2} -; SKX-NEXT: kshiftrw $8, %k1, %k1 -; SKX-NEXT: vpgatherqq (,%zmm1), %zmm3 {%k1} -; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 -; SKX-NEXT: vmovdqa64 %zmm3, %zmm1 -; SKX-NEXT: retq -; -; SKX_32-LABEL: test31: -; SKX_32: # BB#0: -; SKX_32-NEXT: kxnorw %k0, %k0, %k1 -; SKX_32-NEXT: vpgatherdd (,%zmm0), %zmm1 {%k1} -; SKX_32-NEXT: vmovdqa64 %zmm1, %zmm0 -; SKX_32-NEXT: retl - - %res = call <16 x float*> @llvm.masked.gather.v16p0f32.v16p0p0f32(<16 x float**> %ptrs, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float*> undef) - ret <16 x float*>%res -} - -define <16 x i32> @test_gather_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i32> %src0) { -; KNL_64-LABEL: test_gather_16i32: -; KNL_64: # BB#0: -; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2 -; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2 -; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 -; KNL_64-NEXT: vextracti64x4 $1, %zmm3, %ymm2 -; KNL_64-NEXT: kshiftrw $8, %k1, %k2 -; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2} -; KNL_64-NEXT: vpgatherqd (,%zmm0), %ymm3 {%k1} -; KNL_64-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm0 -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test_gather_16i32: -; KNL_32: # BB#0: -; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1 -; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 -; KNL_32-NEXT: vpgatherdd (,%zmm0), %zmm2 {%k1} -; KNL_32-NEXT: vmovdqa64 %zmm2, %zmm0 -; KNL_32-NEXT: retl -; -; SKX-LABEL: test_gather_16i32: -; SKX: # BB#0: -; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 -; SKX-NEXT: vpslld $31, %zmm2, %zmm2 -; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 -; SKX-NEXT: vextracti32x8 $1, %zmm3, %ymm2 -; SKX-NEXT: kshiftrw $8, %k1, %k2 -; SKX-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2} -; SKX-NEXT: vpgatherqd (,%zmm0), %ymm3 {%k1} -; SKX-NEXT: vinserti32x8 $1, %ymm2, %zmm3, %zmm0 -; SKX-NEXT: retq -; -; SKX_32-LABEL: test_gather_16i32: -; SKX_32: # BB#0: -; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 -; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 -; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1 -; SKX_32-NEXT: vpgatherdd (,%zmm0), %zmm2 {%k1} -; SKX_32-NEXT: vmovdqa64 %zmm2, %zmm0 -; SKX_32-NEXT: retl - %res = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %ptrs, i32 4, <16 x i1> %mask, <16 x i32> %src0) - ret <16 x i32> %res -} -define <16 x i64> @test_gather_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %src0) { -; KNL_64-LABEL: test_gather_16i64: -; KNL_64: # BB#0: -; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2 -; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2 -; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 -; KNL_64-NEXT: kshiftrw $8, %k1, %k2 -; KNL_64-NEXT: vpgatherqq (,%zmm0), %zmm3 {%k1} -; KNL_64-NEXT: vpgatherqq (,%zmm1), %zmm4 {%k2} -; KNL_64-NEXT: vmovdqa64 %zmm3, %zmm0 -; KNL_64-NEXT: vmovdqa64 %zmm4, %zmm1 -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test_gather_16i64: -; KNL_32: # BB#0: -; KNL_32-NEXT: pushl %ebp -; KNL_32-NEXT: .Lcfi0: -; KNL_32-NEXT: .cfi_def_cfa_offset 8 -; KNL_32-NEXT: .Lcfi1: -; KNL_32-NEXT: .cfi_offset %ebp, -8 -; KNL_32-NEXT: movl %esp, %ebp -; KNL_32-NEXT: .Lcfi2: -; KNL_32-NEXT: .cfi_def_cfa_register %ebp -; KNL_32-NEXT: andl $-64, %esp -; KNL_32-NEXT: subl $64, %esp -; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1 -; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 -; KNL_32-NEXT: vmovdqa64 8(%ebp), %zmm1 -; KNL_32-NEXT: kshiftrw $8, %k1, %k2 -; KNL_32-NEXT: vpgatherdq (,%ymm0), %zmm2 {%k1} -; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; KNL_32-NEXT: vpgatherdq (,%ymm0), %zmm1 {%k2} -; KNL_32-NEXT: vmovdqa64 %zmm2, %zmm0 -; KNL_32-NEXT: movl %ebp, %esp -; KNL_32-NEXT: popl %ebp -; KNL_32-NEXT: retl -; -; SKX-LABEL: test_gather_16i64: -; SKX: # BB#0: -; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 -; SKX-NEXT: vpslld $31, %zmm2, %zmm2 -; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 -; SKX-NEXT: kshiftrw $8, %k1, %k2 -; SKX-NEXT: vpgatherqq (,%zmm0), %zmm3 {%k1} -; SKX-NEXT: vpgatherqq (,%zmm1), %zmm4 {%k2} -; SKX-NEXT: vmovdqa64 %zmm3, %zmm0 -; SKX-NEXT: vmovdqa64 %zmm4, %zmm1 -; SKX-NEXT: retq -; -; SKX_32-LABEL: test_gather_16i64: -; SKX_32: # BB#0: -; SKX_32-NEXT: pushl %ebp -; SKX_32-NEXT: .Lcfi1: -; SKX_32-NEXT: .cfi_def_cfa_offset 8 -; SKX_32-NEXT: .Lcfi2: -; SKX_32-NEXT: .cfi_offset %ebp, -8 -; SKX_32-NEXT: movl %esp, %ebp -; SKX_32-NEXT: .Lcfi3: -; SKX_32-NEXT: .cfi_def_cfa_register %ebp -; SKX_32-NEXT: andl $-64, %esp -; SKX_32-NEXT: subl $64, %esp -; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 -; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 -; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1 -; SKX_32-NEXT: vmovdqa64 8(%ebp), %zmm1 -; SKX_32-NEXT: kshiftrw $8, %k1, %k2 -; SKX_32-NEXT: vpgatherdq (,%ymm0), %zmm2 {%k1} -; SKX_32-NEXT: vextracti32x8 $1, %zmm0, %ymm0 -; SKX_32-NEXT: vpgatherdq (,%ymm0), %zmm1 {%k2} -; SKX_32-NEXT: vmovdqa64 %zmm2, %zmm0 -; SKX_32-NEXT: movl %ebp, %esp -; SKX_32-NEXT: popl %ebp -; SKX_32-NEXT: retl - %res = call <16 x i64> @llvm.masked.gather.v16i64.v16p0i64(<16 x i64*> %ptrs, i32 4, <16 x i1> %mask, <16 x i64> %src0) - ret <16 x i64> %res -} -declare <16 x i64> @llvm.masked.gather.v16i64.v16p0i64(<16 x i64*> %ptrs, i32, <16 x i1> %mask, <16 x i64> %src0) -define <16 x float> @test_gather_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x float> %src0) { -; KNL_64-LABEL: test_gather_16f32: -; KNL_64: # BB#0: -; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2 -; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2 -; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 -; KNL_64-NEXT: vextractf64x4 $1, %zmm3, %ymm2 -; KNL_64-NEXT: kshiftrw $8, %k1, %k2 -; KNL_64-NEXT: vgatherqps (,%zmm1), %ymm2 {%k2} -; KNL_64-NEXT: vgatherqps (,%zmm0), %ymm3 {%k1} -; KNL_64-NEXT: vinsertf64x4 $1, %ymm2, %zmm3, %zmm0 -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test_gather_16f32: -; KNL_32: # BB#0: -; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1 -; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 -; KNL_32-NEXT: vgatherdps (,%zmm0), %zmm2 {%k1} -; KNL_32-NEXT: vmovaps %zmm2, %zmm0 -; KNL_32-NEXT: retl -; -; SKX-LABEL: test_gather_16f32: -; SKX: # BB#0: -; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 -; SKX-NEXT: vpslld $31, %zmm2, %zmm2 -; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 -; SKX-NEXT: vextractf32x8 $1, %zmm3, %ymm2 -; SKX-NEXT: kshiftrw $8, %k1, %k2 -; SKX-NEXT: vgatherqps (,%zmm1), %ymm2 {%k2} -; SKX-NEXT: vgatherqps (,%zmm0), %ymm3 {%k1} -; SKX-NEXT: vinsertf32x8 $1, %ymm2, %zmm3, %zmm0 -; SKX-NEXT: retq -; -; SKX_32-LABEL: test_gather_16f32: -; SKX_32: # BB#0: -; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 -; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 -; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1 -; SKX_32-NEXT: vgatherdps (,%zmm0), %zmm2 {%k1} -; SKX_32-NEXT: vmovaps %zmm2, %zmm0 -; SKX_32-NEXT: retl - %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %ptrs, i32 4, <16 x i1> %mask, <16 x float> %src0) - ret <16 x float> %res -} -define <16 x double> @test_gather_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x double> %src0) { -; KNL_64-LABEL: test_gather_16f64: -; KNL_64: # BB#0: -; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2 -; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2 -; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 -; KNL_64-NEXT: kshiftrw $8, %k1, %k2 -; KNL_64-NEXT: vgatherqpd (,%zmm0), %zmm3 {%k1} -; KNL_64-NEXT: vgatherqpd (,%zmm1), %zmm4 {%k2} -; KNL_64-NEXT: vmovapd %zmm3, %zmm0 -; KNL_64-NEXT: vmovapd %zmm4, %zmm1 -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test_gather_16f64: -; KNL_32: # BB#0: -; KNL_32-NEXT: pushl %ebp -; KNL_32-NEXT: .Lcfi3: -; KNL_32-NEXT: .cfi_def_cfa_offset 8 -; KNL_32-NEXT: .Lcfi4: -; KNL_32-NEXT: .cfi_offset %ebp, -8 -; KNL_32-NEXT: movl %esp, %ebp -; KNL_32-NEXT: .Lcfi5: -; KNL_32-NEXT: .cfi_def_cfa_register %ebp -; KNL_32-NEXT: andl $-64, %esp -; KNL_32-NEXT: subl $64, %esp -; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1 -; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 -; KNL_32-NEXT: vmovapd 8(%ebp), %zmm1 -; KNL_32-NEXT: kshiftrw $8, %k1, %k2 -; KNL_32-NEXT: vgatherdpd (,%ymm0), %zmm2 {%k1} -; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; KNL_32-NEXT: vgatherdpd (,%ymm0), %zmm1 {%k2} -; KNL_32-NEXT: vmovapd %zmm2, %zmm0 -; KNL_32-NEXT: movl %ebp, %esp -; KNL_32-NEXT: popl %ebp -; KNL_32-NEXT: retl -; -; SKX-LABEL: test_gather_16f64: -; SKX: # BB#0: -; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 -; SKX-NEXT: vpslld $31, %zmm2, %zmm2 -; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 -; SKX-NEXT: kshiftrw $8, %k1, %k2 -; SKX-NEXT: vgatherqpd (,%zmm0), %zmm3 {%k1} -; SKX-NEXT: vgatherqpd (,%zmm1), %zmm4 {%k2} -; SKX-NEXT: vmovapd %zmm3, %zmm0 -; SKX-NEXT: vmovapd %zmm4, %zmm1 -; SKX-NEXT: retq -; -; SKX_32-LABEL: test_gather_16f64: -; SKX_32: # BB#0: -; SKX_32-NEXT: pushl %ebp -; SKX_32-NEXT: .Lcfi4: -; SKX_32-NEXT: .cfi_def_cfa_offset 8 -; SKX_32-NEXT: .Lcfi5: -; SKX_32-NEXT: .cfi_offset %ebp, -8 -; SKX_32-NEXT: movl %esp, %ebp -; SKX_32-NEXT: .Lcfi6: -; SKX_32-NEXT: .cfi_def_cfa_register %ebp -; SKX_32-NEXT: andl $-64, %esp -; SKX_32-NEXT: subl $64, %esp -; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 -; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 -; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1 -; SKX_32-NEXT: vmovapd 8(%ebp), %zmm1 -; SKX_32-NEXT: kshiftrw $8, %k1, %k2 -; SKX_32-NEXT: vgatherdpd (,%ymm0), %zmm2 {%k1} -; SKX_32-NEXT: vextracti32x8 $1, %zmm0, %ymm0 -; SKX_32-NEXT: vgatherdpd (,%ymm0), %zmm1 {%k2} -; SKX_32-NEXT: vmovapd %zmm2, %zmm0 -; SKX_32-NEXT: movl %ebp, %esp -; SKX_32-NEXT: popl %ebp -; SKX_32-NEXT: retl - %res = call <16 x double> @llvm.masked.gather.v16f64.v16p0f64(<16 x double*> %ptrs, i32 4, <16 x i1> %mask, <16 x double> %src0) - ret <16 x double> %res -} -declare <16 x double> @llvm.masked.gather.v16f64.v16p0f64(<16 x double*> %ptrs, i32, <16 x i1> %mask, <16 x double> %src0) -define void @test_scatter_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i32> %src0) { -; KNL_64-LABEL: test_scatter_16i32: -; KNL_64: # BB#0: -; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2 -; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2 -; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 -; KNL_64-NEXT: kshiftrw $8, %k1, %k2 -; KNL_64-NEXT: vpscatterqd %ymm3, (,%zmm0) {%k1} -; KNL_64-NEXT: vextracti64x4 $1, %zmm3, %ymm0 -; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k2} -; KNL_64-NEXT: vzeroupper -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test_scatter_16i32: -; KNL_32: # BB#0: -; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1 -; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 -; KNL_32-NEXT: vpscatterdd %zmm2, (,%zmm0) {%k1} -; KNL_32-NEXT: vzeroupper -; KNL_32-NEXT: retl -; -; SKX-LABEL: test_scatter_16i32: -; SKX: # BB#0: -; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 -; SKX-NEXT: vpslld $31, %zmm2, %zmm2 -; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 -; SKX-NEXT: kshiftrw $8, %k1, %k2 -; SKX-NEXT: vpscatterqd %ymm3, (,%zmm0) {%k1} -; SKX-NEXT: vextracti32x8 $1, %zmm3, %ymm0 -; SKX-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k2} -; SKX-NEXT: vzeroupper -; SKX-NEXT: retq -; -; SKX_32-LABEL: test_scatter_16i32: -; SKX_32: # BB#0: -; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 -; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 -; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1 -; SKX_32-NEXT: vpscatterdd %zmm2, (,%zmm0) {%k1} -; SKX_32-NEXT: vzeroupper -; SKX_32-NEXT: retl - call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %src0, <16 x i32*> %ptrs, i32 4, <16 x i1> %mask) - ret void -} -define void @test_scatter_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %src0) { -; KNL_64-LABEL: test_scatter_16i64: -; KNL_64: # BB#0: -; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2 -; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2 -; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 -; KNL_64-NEXT: kshiftrw $8, %k1, %k2 -; KNL_64-NEXT: vpscatterqq %zmm3, (,%zmm0) {%k1} -; KNL_64-NEXT: vpscatterqq %zmm4, (,%zmm1) {%k2} -; KNL_64-NEXT: vzeroupper -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test_scatter_16i64: -; KNL_32: # BB#0: -; KNL_32-NEXT: pushl %ebp -; KNL_32-NEXT: .Lcfi6: -; KNL_32-NEXT: .cfi_def_cfa_offset 8 -; KNL_32-NEXT: .Lcfi7: -; KNL_32-NEXT: .cfi_offset %ebp, -8 -; KNL_32-NEXT: movl %esp, %ebp -; KNL_32-NEXT: .Lcfi8: -; KNL_32-NEXT: .cfi_def_cfa_register %ebp -; KNL_32-NEXT: andl $-64, %esp -; KNL_32-NEXT: subl $64, %esp -; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1 -; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 -; KNL_32-NEXT: vmovdqa64 8(%ebp), %zmm1 -; KNL_32-NEXT: kshiftrw $8, %k1, %k2 -; KNL_32-NEXT: vpscatterdq %zmm2, (,%ymm0) {%k1} -; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; KNL_32-NEXT: vpscatterdq %zmm1, (,%ymm0) {%k2} -; KNL_32-NEXT: movl %ebp, %esp -; KNL_32-NEXT: popl %ebp -; KNL_32-NEXT: vzeroupper -; KNL_32-NEXT: retl -; -; SKX-LABEL: test_scatter_16i64: -; SKX: # BB#0: -; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 -; SKX-NEXT: vpslld $31, %zmm2, %zmm2 -; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 -; SKX-NEXT: kshiftrw $8, %k1, %k2 -; SKX-NEXT: vpscatterqq %zmm3, (,%zmm0) {%k1} -; SKX-NEXT: vpscatterqq %zmm4, (,%zmm1) {%k2} -; SKX-NEXT: vzeroupper -; SKX-NEXT: retq -; -; SKX_32-LABEL: test_scatter_16i64: -; SKX_32: # BB#0: -; SKX_32-NEXT: pushl %ebp -; SKX_32-NEXT: .Lcfi7: -; SKX_32-NEXT: .cfi_def_cfa_offset 8 -; SKX_32-NEXT: .Lcfi8: -; SKX_32-NEXT: .cfi_offset %ebp, -8 -; SKX_32-NEXT: movl %esp, %ebp -; SKX_32-NEXT: .Lcfi9: -; SKX_32-NEXT: .cfi_def_cfa_register %ebp -; SKX_32-NEXT: andl $-64, %esp -; SKX_32-NEXT: subl $64, %esp -; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 -; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 -; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1 -; SKX_32-NEXT: vmovdqa64 8(%ebp), %zmm1 -; SKX_32-NEXT: kshiftrw $8, %k1, %k2 -; SKX_32-NEXT: vpscatterdq %zmm2, (,%ymm0) {%k1} -; SKX_32-NEXT: vextracti32x8 $1, %zmm0, %ymm0 -; SKX_32-NEXT: vpscatterdq %zmm1, (,%ymm0) {%k2} -; SKX_32-NEXT: movl %ebp, %esp -; SKX_32-NEXT: popl %ebp -; SKX_32-NEXT: vzeroupper -; SKX_32-NEXT: retl - call void @llvm.masked.scatter.v16i64.v16p0i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32 4, <16 x i1> %mask) - ret void -} -declare void @llvm.masked.scatter.v16i64.v16p0i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32, <16 x i1> %mask) -define void @test_scatter_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x float> %src0) { -; KNL_64-LABEL: test_scatter_16f32: -; KNL_64: # BB#0: -; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2 -; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2 -; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 -; KNL_64-NEXT: kshiftrw $8, %k1, %k2 -; KNL_64-NEXT: vscatterqps %ymm3, (,%zmm0) {%k1} -; KNL_64-NEXT: vextractf64x4 $1, %zmm3, %ymm0 -; KNL_64-NEXT: vscatterqps %ymm0, (,%zmm1) {%k2} -; KNL_64-NEXT: vzeroupper -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test_scatter_16f32: -; KNL_32: # BB#0: -; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1 -; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 -; KNL_32-NEXT: vscatterdps %zmm2, (,%zmm0) {%k1} -; KNL_32-NEXT: vzeroupper -; KNL_32-NEXT: retl -; -; SKX-LABEL: test_scatter_16f32: -; SKX: # BB#0: -; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 -; SKX-NEXT: vpslld $31, %zmm2, %zmm2 -; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 -; SKX-NEXT: kshiftrw $8, %k1, %k2 -; SKX-NEXT: vscatterqps %ymm3, (,%zmm0) {%k1} -; SKX-NEXT: vextractf32x8 $1, %zmm3, %ymm0 -; SKX-NEXT: vscatterqps %ymm0, (,%zmm1) {%k2} -; SKX-NEXT: vzeroupper -; SKX-NEXT: retq -; -; SKX_32-LABEL: test_scatter_16f32: -; SKX_32: # BB#0: -; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 -; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 -; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1 -; SKX_32-NEXT: vscatterdps %zmm2, (,%zmm0) {%k1} -; SKX_32-NEXT: vzeroupper -; SKX_32-NEXT: retl - call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %src0, <16 x float*> %ptrs, i32 4, <16 x i1> %mask) - ret void -} -declare void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %src0, <16 x float*> %ptrs, i32, <16 x i1> %mask) -define void @test_scatter_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x double> %src0) { -; KNL_64-LABEL: test_scatter_16f64: -; KNL_64: # BB#0: -; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2 -; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2 -; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 -; KNL_64-NEXT: kshiftrw $8, %k1, %k2 -; KNL_64-NEXT: vscatterqpd %zmm3, (,%zmm0) {%k1} -; KNL_64-NEXT: vscatterqpd %zmm4, (,%zmm1) {%k2} -; KNL_64-NEXT: vzeroupper -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test_scatter_16f64: -; KNL_32: # BB#0: -; KNL_32-NEXT: pushl %ebp -; KNL_32-NEXT: .Lcfi9: -; KNL_32-NEXT: .cfi_def_cfa_offset 8 -; KNL_32-NEXT: .Lcfi10: -; KNL_32-NEXT: .cfi_offset %ebp, -8 -; KNL_32-NEXT: movl %esp, %ebp -; KNL_32-NEXT: .Lcfi11: -; KNL_32-NEXT: .cfi_def_cfa_register %ebp -; KNL_32-NEXT: andl $-64, %esp -; KNL_32-NEXT: subl $64, %esp -; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1 -; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 -; KNL_32-NEXT: vmovapd 8(%ebp), %zmm1 -; KNL_32-NEXT: kshiftrw $8, %k1, %k2 -; KNL_32-NEXT: vscatterdpd %zmm2, (,%ymm0) {%k1} -; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; KNL_32-NEXT: vscatterdpd %zmm1, (,%ymm0) {%k2} -; KNL_32-NEXT: movl %ebp, %esp -; KNL_32-NEXT: popl %ebp -; KNL_32-NEXT: vzeroupper -; KNL_32-NEXT: retl -; -; SKX-LABEL: test_scatter_16f64: -; SKX: # BB#0: -; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 -; SKX-NEXT: vpslld $31, %zmm2, %zmm2 -; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 -; SKX-NEXT: kshiftrw $8, %k1, %k2 -; SKX-NEXT: vscatterqpd %zmm3, (,%zmm0) {%k1} -; SKX-NEXT: vscatterqpd %zmm4, (,%zmm1) {%k2} -; SKX-NEXT: vzeroupper -; SKX-NEXT: retq -; -; SKX_32-LABEL: test_scatter_16f64: -; SKX_32: # BB#0: -; SKX_32-NEXT: pushl %ebp -; SKX_32-NEXT: .Lcfi10: -; SKX_32-NEXT: .cfi_def_cfa_offset 8 -; SKX_32-NEXT: .Lcfi11: -; SKX_32-NEXT: .cfi_offset %ebp, -8 -; SKX_32-NEXT: movl %esp, %ebp -; SKX_32-NEXT: .Lcfi12: -; SKX_32-NEXT: .cfi_def_cfa_register %ebp -; SKX_32-NEXT: andl $-64, %esp -; SKX_32-NEXT: subl $64, %esp -; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 -; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 -; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1 -; SKX_32-NEXT: vmovapd 8(%ebp), %zmm1 -; SKX_32-NEXT: kshiftrw $8, %k1, %k2 -; SKX_32-NEXT: vscatterdpd %zmm2, (,%ymm0) {%k1} -; SKX_32-NEXT: vextracti32x8 $1, %zmm0, %ymm0 -; SKX_32-NEXT: vscatterdpd %zmm1, (,%ymm0) {%k2} -; SKX_32-NEXT: movl %ebp, %esp -; SKX_32-NEXT: popl %ebp -; SKX_32-NEXT: vzeroupper -; SKX_32-NEXT: retl - call void @llvm.masked.scatter.v16f64.v16p0f64(<16 x double> %src0, <16 x double*> %ptrs, i32 4, <16 x i1> %mask) - ret void -} -declare void @llvm.masked.scatter.v16f64.v16p0f64(<16 x double> %src0, <16 x double*> %ptrs, i32, <16 x i1> %mask) - -define <4 x i64> @test_pr28312(<4 x i64*> %p1, <4 x i1> %k, <4 x i1> %k2,<4 x i64> %d) { -; KNL_64-LABEL: test_pr28312: -; KNL_64: # BB#0: -; KNL_64-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> -; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL_64-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1 -; KNL_64-NEXT: vpxord %zmm2, %zmm2, %zmm2 -; KNL_64-NEXT: vinserti64x4 $0, %ymm1, %zmm2, %zmm1 -; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1 -; KNL_64-NEXT: vpgatherqq (,%zmm0), %zmm1 {%k1} -; KNL_64-NEXT: vpaddq %ymm1, %ymm1, %ymm0 -; KNL_64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test_pr28312: -; KNL_32: # BB#0: -; KNL_32-NEXT: pushl %ebp -; KNL_32-NEXT: .Lcfi12: -; KNL_32-NEXT: .cfi_def_cfa_offset 8 -; KNL_32-NEXT: .Lcfi13: -; KNL_32-NEXT: .cfi_offset %ebp, -8 -; KNL_32-NEXT: movl %esp, %ebp -; KNL_32-NEXT: .Lcfi14: -; KNL_32-NEXT: .cfi_def_cfa_register %ebp -; KNL_32-NEXT: andl $-32, %esp -; KNL_32-NEXT: subl $32, %esp -; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> -; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL_32-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL_32-NEXT: vpmovsxdq %xmm1, %ymm1 -; KNL_32-NEXT: vpxord %zmm2, %zmm2, %zmm2 -; KNL_32-NEXT: vinserti64x4 $0, %ymm1, %zmm2, %zmm1 -; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0 -; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 -; KNL_32-NEXT: vpgatherqq (,%zmm0), %zmm1 {%k1} -; KNL_32-NEXT: vpaddq %ymm1, %ymm1, %ymm0 -; KNL_32-NEXT: vpaddq %ymm0, %ymm1, %ymm0 -; KNL_32-NEXT: movl %ebp, %esp -; KNL_32-NEXT: popl %ebp -; KNL_32-NEXT: retl -; -; SKX-LABEL: test_pr28312: -; SKX: # BB#0: -; SKX-NEXT: vpslld $31, %xmm1, %xmm1 -; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1 -; SKX-NEXT: vpgatherqq (,%ymm0), %ymm1 {%k1} -; SKX-NEXT: vpaddq %ymm1, %ymm1, %ymm0 -; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 -; SKX-NEXT: retq -; -; SKX_32-LABEL: test_pr28312: -; SKX_32: # BB#0: -; SKX_32-NEXT: pushl %ebp -; SKX_32-NEXT: .Lcfi13: -; SKX_32-NEXT: .cfi_def_cfa_offset 8 -; SKX_32-NEXT: .Lcfi14: -; SKX_32-NEXT: .cfi_offset %ebp, -8 -; SKX_32-NEXT: movl %esp, %ebp -; SKX_32-NEXT: .Lcfi15: -; SKX_32-NEXT: .cfi_def_cfa_register %ebp -; SKX_32-NEXT: andl $-32, %esp -; SKX_32-NEXT: subl $32, %esp -; SKX_32-NEXT: vpslld $31, %xmm1, %xmm1 -; SKX_32-NEXT: vptestmd %xmm1, %xmm1, %k1 -; SKX_32-NEXT: vpgatherdq (,%xmm0), %ymm1 {%k1} -; SKX_32-NEXT: vpaddq %ymm1, %ymm1, %ymm0 -; SKX_32-NEXT: vpaddq %ymm0, %ymm1, %ymm0 -; SKX_32-NEXT: movl %ebp, %esp -; SKX_32-NEXT: popl %ebp -; SKX_32-NEXT: retl - %g1 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef) - %g2 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef) - %g3 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef) - %a = add <4 x i64> %g1, %g2 - %b = add <4 x i64> %a, %g3 - ret <4 x i64> %b -} -declare <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*>, i32, <4 x i1>, <4 x i64>) |
