aboutsummaryrefslogtreecommitdiff
path: root/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-01-19 10:01:25 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-01-19 10:01:25 +0000
commitd8e91e46262bc44006913e6796843909f1ac7bcd (patch)
tree7d0c143d9b38190e0fa0180805389da22cd834c5 /test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll
parentb7eb8e35e481a74962664b63dfb09483b200209a (diff)
downloadsrc-d8e91e46262bc44006913e6796843909f1ac7bcd.tar.gz
src-d8e91e46262bc44006913e6796843909f1ac7bcd.zip
Vendor import of llvm trunk r351319 (just before the release_80 branchvendor/llvm/llvm-trunk-r351319
Notes
Notes: svn path=/vendor/llvm/dist/; revision=343171 svn path=/vendor/llvm/llvm-trunk-r351319/; revision=343172; tag=vendor/llvm/llvm-trunk-r351319
Diffstat (limited to 'test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll')
-rw-r--r--test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll128
1 files changed, 56 insertions, 72 deletions
diff --git a/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll b/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll
index 6c1cfa8fc093..8b513f0a29cd 100644
--- a/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll
@@ -6626,13 +6626,11 @@ define <2 x i64> @test_mm_rol_epi32(<2 x i64> %__A) {
; CHECK-NEXT: ret{{[l|q]}}
entry:
%0 = bitcast <2 x i64> %__A to <4 x i32>
- %1 = tail call <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32> %0, i32 5)
+ %1 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
%2 = bitcast <4 x i32> %1 to <2 x i64>
ret <2 x i64> %2
}
-declare <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32>, i32)
-
define <2 x i64> @test_mm_mask_rol_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A) {
; X86-LABEL: test_mm_mask_rol_epi32:
; X86: # %bb.0: # %entry
@@ -6648,7 +6646,7 @@ define <2 x i64> @test_mm_mask_rol_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x i
; X64-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__A to <4 x i32>
- %1 = tail call <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32> %0, i32 5)
+ %1 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
%2 = bitcast <2 x i64> %__W to <4 x i32>
%3 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -6672,7 +6670,7 @@ define <2 x i64> @test_mm_maskz_rol_epi32(i8 zeroext %__U, <2 x i64> %__A) {
; X64-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__A to <4 x i32>
- %1 = tail call <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32> %0, i32 5)
+ %1 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
%2 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> zeroinitializer
@@ -6687,13 +6685,11 @@ define <4 x i64> @test_mm256_rol_epi32(<4 x i64> %__A) {
; CHECK-NEXT: ret{{[l|q]}}
entry:
%0 = bitcast <4 x i64> %__A to <8 x i32>
- %1 = tail call <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32> %0, i32 5)
+ %1 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
%2 = bitcast <8 x i32> %1 to <4 x i64>
ret <4 x i64> %2
}
-declare <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32>, i32)
-
define <4 x i64> @test_mm256_mask_rol_epi32(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A) {
; X86-LABEL: test_mm256_mask_rol_epi32:
; X86: # %bb.0: # %entry
@@ -6709,7 +6705,7 @@ define <4 x i64> @test_mm256_mask_rol_epi32(<4 x i64> %__W, i8 zeroext %__U, <4
; X64-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__A to <8 x i32>
- %1 = tail call <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32> %0, i32 5)
+ %1 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
%2 = bitcast <4 x i64> %__W to <8 x i32>
%3 = bitcast i8 %__U to <8 x i1>
%4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
@@ -6732,7 +6728,7 @@ define <4 x i64> @test_mm256_maskz_rol_epi32(i8 zeroext %__U, <4 x i64> %__A) {
; X64-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__A to <8 x i32>
- %1 = tail call <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32> %0, i32 5)
+ %1 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
%2 = bitcast i8 %__U to <8 x i1>
%3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer
%4 = bitcast <8 x i32> %3 to <4 x i64>
@@ -6745,12 +6741,10 @@ define <2 x i64> @test_mm_rol_epi64(<2 x i64> %__A) {
; CHECK-NEXT: vprolq $5, %xmm0, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
entry:
- %0 = tail call <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64> %__A, i32 5)
+ %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> <i64 5, i64 5>)
ret <2 x i64> %0
}
-declare <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64>, i32)
-
define <2 x i64> @test_mm_mask_rol_epi64(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A) {
; X86-LABEL: test_mm_mask_rol_epi64:
; X86: # %bb.0: # %entry
@@ -6765,7 +6759,7 @@ define <2 x i64> @test_mm_mask_rol_epi64(<2 x i64> %__W, i8 zeroext %__U, <2 x i
; X64-NEXT: vprolq $5, %xmm1, %xmm0 {%k1}
; X64-NEXT: retq
entry:
- %0 = tail call <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64> %__A, i32 5)
+ %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> <i64 5, i64 5>)
%1 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> %__W
@@ -6786,7 +6780,7 @@ define <2 x i64> @test_mm_maskz_rol_epi64(i8 zeroext %__U, <2 x i64> %__A) {
; X64-NEXT: vprolq $5, %xmm0, %xmm0 {%k1} {z}
; X64-NEXT: retq
entry:
- %0 = tail call <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64> %__A, i32 5)
+ %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> <i64 5, i64 5>)
%1 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> zeroinitializer
@@ -6799,12 +6793,10 @@ define <4 x i64> @test_mm256_rol_epi64(<4 x i64> %__A) {
; CHECK-NEXT: vprolq $5, %ymm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
entry:
- %0 = tail call <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64> %__A, i32 5)
+ %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> <i64 5, i64 5,i64 5, i64 5>)
ret <4 x i64> %0
}
-declare <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64>, i32)
-
define <4 x i64> @test_mm256_mask_rol_epi64(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A) {
; X86-LABEL: test_mm256_mask_rol_epi64:
; X86: # %bb.0: # %entry
@@ -6819,7 +6811,7 @@ define <4 x i64> @test_mm256_mask_rol_epi64(<4 x i64> %__W, i8 zeroext %__U, <4
; X64-NEXT: vprolq $5, %ymm1, %ymm0 {%k1}
; X64-NEXT: retq
entry:
- %0 = tail call <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64> %__A, i32 5)
+ %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> <i64 5, i64 5,i64 5, i64 5>)
%1 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> %__W
@@ -6840,7 +6832,7 @@ define <4 x i64> @test_mm256_maskz_rol_epi64(i8 zeroext %__U, <4 x i64> %__A) {
; X64-NEXT: vprolq $5, %ymm0, %ymm0 {%k1} {z}
; X64-NEXT: retq
entry:
- %0 = tail call <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64> %__A, i32 5)
+ %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> <i64 5, i64 5,i64 5, i64 5>)
%1 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> zeroinitializer
@@ -6855,7 +6847,7 @@ define <2 x i64> @test_mm_rolv_epi32(<2 x i64> %__A, <2 x i64> %__B) {
entry:
%0 = bitcast <2 x i64> %__A to <4 x i32>
%1 = bitcast <2 x i64> %__B to <4 x i32>
- %2 = tail call <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32> %0, <4 x i32> %1)
+ %2 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> %1)
%3 = bitcast <4 x i32> %2 to <2 x i64>
ret <2 x i64> %3
}
@@ -6876,7 +6868,7 @@ define <2 x i64> @test_mm_mask_rolv_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x
entry:
%0 = bitcast <2 x i64> %__A to <4 x i32>
%1 = bitcast <2 x i64> %__B to <4 x i32>
- %2 = tail call <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32> %0, <4 x i32> %1)
+ %2 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> %1)
%3 = bitcast <2 x i64> %__W to <4 x i32>
%4 = bitcast i8 %__U to <8 x i1>
%extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -6901,7 +6893,7 @@ define <2 x i64> @test_mm_maskz_rolv_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x
entry:
%0 = bitcast <2 x i64> %__A to <4 x i32>
%1 = bitcast <2 x i64> %__B to <4 x i32>
- %2 = tail call <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32> %0, <4 x i32> %1)
+ %2 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> %1)
%3 = bitcast i8 %__U to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = select <4 x i1> %extract.i, <4 x i32> %2, <4 x i32> zeroinitializer
@@ -6917,7 +6909,7 @@ define <4 x i64> @test_mm256_rolv_epi32(<4 x i64> %__A, <4 x i64> %__B) {
entry:
%0 = bitcast <4 x i64> %__A to <8 x i32>
%1 = bitcast <4 x i64> %__B to <8 x i32>
- %2 = tail call <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32> %0, <8 x i32> %1)
+ %2 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> %1)
%3 = bitcast <8 x i32> %2 to <4 x i64>
ret <4 x i64> %3
}
@@ -6938,7 +6930,7 @@ define <4 x i64> @test_mm256_mask_rolv_epi32(<4 x i64> %__W, i8 zeroext %__U, <4
entry:
%0 = bitcast <4 x i64> %__A to <8 x i32>
%1 = bitcast <4 x i64> %__B to <8 x i32>
- %2 = tail call <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32> %0, <8 x i32> %1)
+ %2 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> %1)
%3 = bitcast <4 x i64> %__W to <8 x i32>
%4 = bitcast i8 %__U to <8 x i1>
%5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3
@@ -6962,7 +6954,7 @@ define <4 x i64> @test_mm256_maskz_rolv_epi32(i8 zeroext %__U, <4 x i64> %__A, <
entry:
%0 = bitcast <4 x i64> %__A to <8 x i32>
%1 = bitcast <4 x i64> %__B to <8 x i32>
- %2 = tail call <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32> %0, <8 x i32> %1)
+ %2 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> %1)
%3 = bitcast i8 %__U to <8 x i1>
%4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer
%5 = bitcast <8 x i32> %4 to <4 x i64>
@@ -6975,7 +6967,7 @@ define <2 x i64> @test_mm_rolv_epi64(<2 x i64> %__A, <2 x i64> %__B) {
; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
entry:
- %0 = tail call <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64> %__A, <2 x i64> %__B)
+ %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> %__B)
ret <2 x i64> %0
}
@@ -6993,7 +6985,7 @@ define <2 x i64> @test_mm_mask_rolv_epi64(<2 x i64> %__W, i8 zeroext %__U, <2 x
; X64-NEXT: vprolvq %xmm2, %xmm1, %xmm0 {%k1}
; X64-NEXT: retq
entry:
- %0 = tail call <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64> %__A, <2 x i64> %__B)
+ %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> %__B)
%1 = bitcast i8 %__U to <8 x i1>
%extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__W
@@ -7014,7 +7006,7 @@ define <2 x i64> @test_mm_maskz_rolv_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x
; X64-NEXT: vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z}
; X64-NEXT: retq
entry:
- %0 = tail call <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64> %__A, <2 x i64> %__B)
+ %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> %__B)
%1 = bitcast i8 %__U to <8 x i1>
%extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer
@@ -7027,7 +7019,7 @@ define <4 x i64> @test_mm256_rolv_epi64(<4 x i64> %__A, <4 x i64> %__B) {
; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
entry:
- %0 = tail call <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64> %__A, <4 x i64> %__B)
+ %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> %__B)
ret <4 x i64> %0
}
@@ -7045,7 +7037,7 @@ define <4 x i64> @test_mm256_mask_rolv_epi64(<4 x i64> %__W, i8 zeroext %__U, <4
; X64-NEXT: vprolvq %ymm2, %ymm1, %ymm0 {%k1}
; X64-NEXT: retq
entry:
- %0 = tail call <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64> %__A, <4 x i64> %__B)
+ %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> %__B)
%1 = bitcast i8 %__U to <8 x i1>
%extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__W
@@ -7066,7 +7058,7 @@ define <4 x i64> @test_mm256_maskz_rolv_epi64(i8 zeroext %__U, <4 x i64> %__A, <
; X64-NEXT: vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z}
; X64-NEXT: retq
entry:
- %0 = tail call <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64> %__A, <4 x i64> %__B)
+ %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> %__B)
%1 = bitcast i8 %__U to <8 x i1>
%extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer
@@ -7080,13 +7072,11 @@ define <2 x i64> @test_mm_ror_epi32(<2 x i64> %__A) {
; CHECK-NEXT: ret{{[l|q]}}
entry:
%0 = bitcast <2 x i64> %__A to <4 x i32>
- %1 = tail call <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32> %0, i32 5)
+ %1 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
%2 = bitcast <4 x i32> %1 to <2 x i64>
ret <2 x i64> %2
}
-declare <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32>, i32)
-
define <2 x i64> @test_mm_mask_ror_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A) {
; X86-LABEL: test_mm_mask_ror_epi32:
; X86: # %bb.0: # %entry
@@ -7102,7 +7092,7 @@ define <2 x i64> @test_mm_mask_ror_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x i
; X64-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__A to <4 x i32>
- %1 = tail call <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32> %0, i32 5)
+ %1 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
%2 = bitcast <2 x i64> %__W to <4 x i32>
%3 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -7126,7 +7116,7 @@ define <2 x i64> @test_mm_maskz_ror_epi32(i8 zeroext %__U, <2 x i64> %__A) {
; X64-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__A to <4 x i32>
- %1 = tail call <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32> %0, i32 5)
+ %1 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
%2 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> zeroinitializer
@@ -7141,13 +7131,11 @@ define <4 x i64> @test_mm256_ror_epi32(<4 x i64> %__A) {
; CHECK-NEXT: ret{{[l|q]}}
entry:
%0 = bitcast <4 x i64> %__A to <8 x i32>
- %1 = tail call <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32> %0, i32 5)
+ %1 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
%2 = bitcast <8 x i32> %1 to <4 x i64>
ret <4 x i64> %2
}
-declare <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32>, i32)
-
define <4 x i64> @test_mm256_mask_ror_epi32(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A) {
; X86-LABEL: test_mm256_mask_ror_epi32:
; X86: # %bb.0: # %entry
@@ -7163,7 +7151,7 @@ define <4 x i64> @test_mm256_mask_ror_epi32(<4 x i64> %__W, i8 zeroext %__U, <4
; X64-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__A to <8 x i32>
- %1 = tail call <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32> %0, i32 5)
+ %1 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
%2 = bitcast <4 x i64> %__W to <8 x i32>
%3 = bitcast i8 %__U to <8 x i1>
%4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
@@ -7186,7 +7174,7 @@ define <4 x i64> @test_mm256_maskz_ror_epi32(i8 zeroext %__U, <4 x i64> %__A) {
; X64-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__A to <8 x i32>
- %1 = tail call <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32> %0, i32 5)
+ %1 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
%2 = bitcast i8 %__U to <8 x i1>
%3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer
%4 = bitcast <8 x i32> %3 to <4 x i64>
@@ -7199,12 +7187,10 @@ define <2 x i64> @test_mm_ror_epi64(<2 x i64> %__A) {
; CHECK-NEXT: vprorq $5, %xmm0, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
entry:
- %0 = tail call <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64> %__A, i32 5)
+ %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> <i64 5, i64 5>)
ret <2 x i64> %0
}
-declare <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64>, i32)
-
define <2 x i64> @test_mm_mask_ror_epi64(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A) {
; X86-LABEL: test_mm_mask_ror_epi64:
; X86: # %bb.0: # %entry
@@ -7219,7 +7205,7 @@ define <2 x i64> @test_mm_mask_ror_epi64(<2 x i64> %__W, i8 zeroext %__U, <2 x i
; X64-NEXT: vprorq $5, %xmm1, %xmm0 {%k1}
; X64-NEXT: retq
entry:
- %0 = tail call <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64> %__A, i32 5)
+ %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> <i64 5, i64 5>)
%1 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> %__W
@@ -7240,7 +7226,7 @@ define <2 x i64> @test_mm_maskz_ror_epi64(i8 zeroext %__U, <2 x i64> %__A) {
; X64-NEXT: vprorq $5, %xmm0, %xmm0 {%k1} {z}
; X64-NEXT: retq
entry:
- %0 = tail call <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64> %__A, i32 5)
+ %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> <i64 5, i64 5>)
%1 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> zeroinitializer
@@ -7253,12 +7239,10 @@ define <4 x i64> @test_mm256_ror_epi64(<4 x i64> %__A) {
; CHECK-NEXT: vprorq $5, %ymm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
entry:
- %0 = tail call <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64> %__A, i32 5)
+ %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> <i64 5, i64 5, i64 5, i64 5>)
ret <4 x i64> %0
}
-declare <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64>, i32)
-
define <4 x i64> @test_mm256_mask_ror_epi64(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A) {
; X86-LABEL: test_mm256_mask_ror_epi64:
; X86: # %bb.0: # %entry
@@ -7273,7 +7257,7 @@ define <4 x i64> @test_mm256_mask_ror_epi64(<4 x i64> %__W, i8 zeroext %__U, <4
; X64-NEXT: vprorq $5, %ymm1, %ymm0 {%k1}
; X64-NEXT: retq
entry:
- %0 = tail call <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64> %__A, i32 5)
+ %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> <i64 5, i64 5, i64 5, i64 5>)
%1 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> %__W
@@ -7294,7 +7278,7 @@ define <4 x i64> @test_mm256_maskz_ror_epi64(i8 zeroext %__U, <4 x i64> %__A) {
; X64-NEXT: vprorq $5, %ymm0, %ymm0 {%k1} {z}
; X64-NEXT: retq
entry:
- %0 = tail call <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64> %__A, i32 5)
+ %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> <i64 5, i64 5, i64 5, i64 5>)
%1 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> zeroinitializer
@@ -7309,7 +7293,7 @@ define <2 x i64> @test_mm_rorv_epi32(<2 x i64> %__A, <2 x i64> %__B) {
entry:
%0 = bitcast <2 x i64> %__A to <4 x i32>
%1 = bitcast <2 x i64> %__B to <4 x i32>
- %2 = tail call <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32> %0, <4 x i32> %1)
+ %2 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> %1)
%3 = bitcast <4 x i32> %2 to <2 x i64>
ret <2 x i64> %3
}
@@ -7330,7 +7314,7 @@ define <2 x i64> @test_mm_mask_rorv_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x
entry:
%0 = bitcast <2 x i64> %__A to <4 x i32>
%1 = bitcast <2 x i64> %__B to <4 x i32>
- %2 = tail call <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32> %0, <4 x i32> %1)
+ %2 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> %1)
%3 = bitcast <2 x i64> %__W to <4 x i32>
%4 = bitcast i8 %__U to <8 x i1>
%extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -7355,7 +7339,7 @@ define <2 x i64> @test_mm_maskz_rorv_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x
entry:
%0 = bitcast <2 x i64> %__A to <4 x i32>
%1 = bitcast <2 x i64> %__B to <4 x i32>
- %2 = tail call <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32> %0, <4 x i32> %1)
+ %2 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> %1)
%3 = bitcast i8 %__U to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = select <4 x i1> %extract.i, <4 x i32> %2, <4 x i32> zeroinitializer
@@ -7371,7 +7355,7 @@ define <4 x i64> @test_mm256_rorv_epi32(<4 x i64> %__A, <4 x i64> %__B) {
entry:
%0 = bitcast <4 x i64> %__A to <8 x i32>
%1 = bitcast <4 x i64> %__B to <8 x i32>
- %2 = tail call <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32> %0, <8 x i32> %1)
+ %2 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> %1)
%3 = bitcast <8 x i32> %2 to <4 x i64>
ret <4 x i64> %3
}
@@ -7392,7 +7376,7 @@ define <4 x i64> @test_mm256_mask_rorv_epi32(<4 x i64> %__W, i8 zeroext %__U, <4
entry:
%0 = bitcast <4 x i64> %__A to <8 x i32>
%1 = bitcast <4 x i64> %__B to <8 x i32>
- %2 = tail call <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32> %0, <8 x i32> %1)
+ %2 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> %1)
%3 = bitcast <4 x i64> %__W to <8 x i32>
%4 = bitcast i8 %__U to <8 x i1>
%5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3
@@ -7416,7 +7400,7 @@ define <4 x i64> @test_mm256_maskz_rorv_epi32(i8 zeroext %__U, <4 x i64> %__A, <
entry:
%0 = bitcast <4 x i64> %__A to <8 x i32>
%1 = bitcast <4 x i64> %__B to <8 x i32>
- %2 = tail call <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32> %0, <8 x i32> %1)
+ %2 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> %1)
%3 = bitcast i8 %__U to <8 x i1>
%4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer
%5 = bitcast <8 x i32> %4 to <4 x i64>
@@ -7429,7 +7413,7 @@ define <2 x i64> @test_mm_rorv_epi64(<2 x i64> %__A, <2 x i64> %__B) {
; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
entry:
- %0 = tail call <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64> %__A, <2 x i64> %__B)
+ %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> %__B)
ret <2 x i64> %0
}
@@ -7447,7 +7431,7 @@ define <2 x i64> @test_mm_mask_rorv_epi64(<2 x i64> %__W, i8 zeroext %__U, <2 x
; X64-NEXT: vprorvq %xmm2, %xmm1, %xmm0 {%k1}
; X64-NEXT: retq
entry:
- %0 = tail call <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64> %__A, <2 x i64> %__B)
+ %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> %__B)
%1 = bitcast i8 %__U to <8 x i1>
%extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__W
@@ -7468,7 +7452,7 @@ define <2 x i64> @test_mm_maskz_rorv_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x
; X64-NEXT: vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z}
; X64-NEXT: retq
entry:
- %0 = tail call <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64> %__A, <2 x i64> %__B)
+ %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> %__B)
%1 = bitcast i8 %__U to <8 x i1>
%extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer
@@ -7481,7 +7465,7 @@ define <4 x i64> @test_mm256_rorv_epi64(<4 x i64> %__A, <4 x i64> %__B) {
; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
entry:
- %0 = tail call <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64> %__A, <4 x i64> %__B)
+ %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> %__B)
ret <4 x i64> %0
}
@@ -7499,7 +7483,7 @@ define <4 x i64> @test_mm256_mask_rorv_epi64(<4 x i64> %__W, i8 zeroext %__U, <4
; X64-NEXT: vprorvq %ymm2, %ymm1, %ymm0 {%k1}
; X64-NEXT: retq
entry:
- %0 = tail call <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64> %__A, <4 x i64> %__B)
+ %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> %__B)
%1 = bitcast i8 %__U to <8 x i1>
%extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__W
@@ -7520,7 +7504,7 @@ define <4 x i64> @test_mm256_maskz_rorv_epi64(i8 zeroext %__U, <4 x i64> %__A, <
; X64-NEXT: vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z}
; X64-NEXT: retq
entry:
- %0 = tail call <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64> %__A, <4 x i64> %__B)
+ %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> %__B)
%1 = bitcast i8 %__U to <8 x i1>
%extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer
@@ -7572,13 +7556,13 @@ declare void @llvm.masked.compressstore.v4f32(<4 x float>, float*, <4 x i1>)
declare void @llvm.masked.compressstore.v8f32(<8 x float>, float*, <8 x i1>)
declare void @llvm.masked.compressstore.v4i32(<4 x i32>, i32*, <4 x i1>)
declare void @llvm.masked.compressstore.v8i32(<8 x i32>, i32*, <8 x i1>)
-declare <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32>, <4 x i32>)
-declare <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32>, <8 x i32>)
-declare <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64>, <2 x i64>)
-declare <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64>, <4 x i64>)
-declare <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32>, <4 x i32>)
-declare <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32>, <8 x i32>)
-declare <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64>, <2 x i64>)
-declare <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64>, <4 x i64>)
+declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <8 x i32> @llvm.fshl.v8i32(<8 x i32>, <8 x i32>, <8 x i32>)
+declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
+declare <4 x i64> @llvm.fshl.v4i64(<4 x i64>, <4 x i64>, <4 x i64>)
+declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <8 x i32> @llvm.fshr.v8i32(<8 x i32>, <8 x i32>, <8 x i32>)
+declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
+declare <4 x i64> @llvm.fshr.v4i64(<4 x i64>, <4 x i64>, <4 x i64>)
!0 = !{i32 1}