diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2019-01-19 10:01:25 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2019-01-19 10:01:25 +0000 |
commit | d8e91e46262bc44006913e6796843909f1ac7bcd (patch) | |
tree | 7d0c143d9b38190e0fa0180805389da22cd834c5 /test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll | |
parent | b7eb8e35e481a74962664b63dfb09483b200209a (diff) | |
download | src-d8e91e46262bc44006913e6796843909f1ac7bcd.tar.gz src-d8e91e46262bc44006913e6796843909f1ac7bcd.zip |
Vendor import of llvm trunk r351319 (just before the release_80 branchvendor/llvm/llvm-trunk-r351319
Notes
Notes:
svn path=/vendor/llvm/dist/; revision=343171
svn path=/vendor/llvm/llvm-trunk-r351319/; revision=343172; tag=vendor/llvm/llvm-trunk-r351319
Diffstat (limited to 'test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll')
-rw-r--r-- | test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll | 128 |
1 files changed, 56 insertions, 72 deletions
diff --git a/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll b/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll index 6c1cfa8fc093..8b513f0a29cd 100644 --- a/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll +++ b/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll @@ -6626,13 +6626,11 @@ define <2 x i64> @test_mm_rol_epi32(<2 x i64> %__A) { ; CHECK-NEXT: ret{{[l|q]}} entry: %0 = bitcast <2 x i64> %__A to <4 x i32> - %1 = tail call <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32> %0, i32 5) + %1 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> <i32 5, i32 5, i32 5, i32 5>) %2 = bitcast <4 x i32> %1 to <2 x i64> ret <2 x i64> %2 } -declare <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32>, i32) - define <2 x i64> @test_mm_mask_rol_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A) { ; X86-LABEL: test_mm_mask_rol_epi32: ; X86: # %bb.0: # %entry @@ -6648,7 +6646,7 @@ define <2 x i64> @test_mm_mask_rol_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x i ; X64-NEXT: retq entry: %0 = bitcast <2 x i64> %__A to <4 x i32> - %1 = tail call <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32> %0, i32 5) + %1 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> <i32 5, i32 5, i32 5, i32 5>) %2 = bitcast <2 x i64> %__W to <4 x i32> %3 = bitcast i8 %__U to <8 x i1> %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -6672,7 +6670,7 @@ define <2 x i64> @test_mm_maskz_rol_epi32(i8 zeroext %__U, <2 x i64> %__A) { ; X64-NEXT: retq entry: %0 = bitcast <2 x i64> %__A to <4 x i32> - %1 = tail call <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32> %0, i32 5) + %1 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> <i32 5, i32 5, i32 5, i32 5>) %2 = bitcast i8 %__U to <8 x i1> %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> zeroinitializer @@ -6687,13 +6685,11 @@ define <4 x i64> @test_mm256_rol_epi32(<4 x i64> %__A) { ; CHECK-NEXT: ret{{[l|q]}} entry: %0 = bitcast <4 x i64> %__A to <8 x i32> - %1 = tail call <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32> %0, i32 5) + %1 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>) %2 = bitcast <8 x i32> %1 to <4 x i64> ret <4 x i64> %2 } -declare <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32>, i32) - define <4 x i64> @test_mm256_mask_rol_epi32(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A) { ; X86-LABEL: test_mm256_mask_rol_epi32: ; X86: # %bb.0: # %entry @@ -6709,7 +6705,7 @@ define <4 x i64> @test_mm256_mask_rol_epi32(<4 x i64> %__W, i8 zeroext %__U, <4 ; X64-NEXT: retq entry: %0 = bitcast <4 x i64> %__A to <8 x i32> - %1 = tail call <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32> %0, i32 5) + %1 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>) %2 = bitcast <4 x i64> %__W to <8 x i32> %3 = bitcast i8 %__U to <8 x i1> %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2 @@ -6732,7 +6728,7 @@ define <4 x i64> @test_mm256_maskz_rol_epi32(i8 zeroext %__U, <4 x i64> %__A) { ; X64-NEXT: retq entry: %0 = bitcast <4 x i64> %__A to <8 x i32> - %1 = tail call <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32> %0, i32 5) + %1 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>) %2 = bitcast i8 %__U to <8 x i1> %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer %4 = bitcast <8 x i32> %3 to <4 x i64> @@ -6745,12 +6741,10 @@ define <2 x i64> @test_mm_rol_epi64(<2 x i64> %__A) { ; CHECK-NEXT: vprolq $5, %xmm0, %xmm0 ; CHECK-NEXT: ret{{[l|q]}} entry: - %0 = tail call <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64> %__A, i32 5) + %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> <i64 5, i64 5>) ret <2 x i64> %0 } -declare <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64>, i32) - define <2 x i64> @test_mm_mask_rol_epi64(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A) { ; X86-LABEL: test_mm_mask_rol_epi64: ; X86: # %bb.0: # %entry @@ -6765,7 +6759,7 @@ define <2 x i64> @test_mm_mask_rol_epi64(<2 x i64> %__W, i8 zeroext %__U, <2 x i ; X64-NEXT: vprolq $5, %xmm1, %xmm0 {%k1} ; X64-NEXT: retq entry: - %0 = tail call <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64> %__A, i32 5) + %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> <i64 5, i64 5>) %1 = bitcast i8 %__U to <8 x i1> %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> %__W @@ -6786,7 +6780,7 @@ define <2 x i64> @test_mm_maskz_rol_epi64(i8 zeroext %__U, <2 x i64> %__A) { ; X64-NEXT: vprolq $5, %xmm0, %xmm0 {%k1} {z} ; X64-NEXT: retq entry: - %0 = tail call <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64> %__A, i32 5) + %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> <i64 5, i64 5>) %1 = bitcast i8 %__U to <8 x i1> %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> zeroinitializer @@ -6799,12 +6793,10 @@ define <4 x i64> @test_mm256_rol_epi64(<4 x i64> %__A) { ; CHECK-NEXT: vprolq $5, %ymm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} entry: - %0 = tail call <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64> %__A, i32 5) + %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> <i64 5, i64 5,i64 5, i64 5>) ret <4 x i64> %0 } -declare <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64>, i32) - define <4 x i64> @test_mm256_mask_rol_epi64(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A) { ; X86-LABEL: test_mm256_mask_rol_epi64: ; X86: # %bb.0: # %entry @@ -6819,7 +6811,7 @@ define <4 x i64> @test_mm256_mask_rol_epi64(<4 x i64> %__W, i8 zeroext %__U, <4 ; X64-NEXT: vprolq $5, %ymm1, %ymm0 {%k1} ; X64-NEXT: retq entry: - %0 = tail call <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64> %__A, i32 5) + %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> <i64 5, i64 5,i64 5, i64 5>) %1 = bitcast i8 %__U to <8 x i1> %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> %__W @@ -6840,7 +6832,7 @@ define <4 x i64> @test_mm256_maskz_rol_epi64(i8 zeroext %__U, <4 x i64> %__A) { ; X64-NEXT: vprolq $5, %ymm0, %ymm0 {%k1} {z} ; X64-NEXT: retq entry: - %0 = tail call <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64> %__A, i32 5) + %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> <i64 5, i64 5,i64 5, i64 5>) %1 = bitcast i8 %__U to <8 x i1> %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> zeroinitializer @@ -6855,7 +6847,7 @@ define <2 x i64> @test_mm_rolv_epi32(<2 x i64> %__A, <2 x i64> %__B) { entry: %0 = bitcast <2 x i64> %__A to <4 x i32> %1 = bitcast <2 x i64> %__B to <4 x i32> - %2 = tail call <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32> %0, <4 x i32> %1) + %2 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> %1) %3 = bitcast <4 x i32> %2 to <2 x i64> ret <2 x i64> %3 } @@ -6876,7 +6868,7 @@ define <2 x i64> @test_mm_mask_rolv_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x entry: %0 = bitcast <2 x i64> %__A to <4 x i32> %1 = bitcast <2 x i64> %__B to <4 x i32> - %2 = tail call <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32> %0, <4 x i32> %1) + %2 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> %1) %3 = bitcast <2 x i64> %__W to <4 x i32> %4 = bitcast i8 %__U to <8 x i1> %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -6901,7 +6893,7 @@ define <2 x i64> @test_mm_maskz_rolv_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x entry: %0 = bitcast <2 x i64> %__A to <4 x i32> %1 = bitcast <2 x i64> %__B to <4 x i32> - %2 = tail call <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32> %0, <4 x i32> %1) + %2 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> %1) %3 = bitcast i8 %__U to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %4 = select <4 x i1> %extract.i, <4 x i32> %2, <4 x i32> zeroinitializer @@ -6917,7 +6909,7 @@ define <4 x i64> @test_mm256_rolv_epi32(<4 x i64> %__A, <4 x i64> %__B) { entry: %0 = bitcast <4 x i64> %__A to <8 x i32> %1 = bitcast <4 x i64> %__B to <8 x i32> - %2 = tail call <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32> %0, <8 x i32> %1) + %2 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> %1) %3 = bitcast <8 x i32> %2 to <4 x i64> ret <4 x i64> %3 } @@ -6938,7 +6930,7 @@ define <4 x i64> @test_mm256_mask_rolv_epi32(<4 x i64> %__W, i8 zeroext %__U, <4 entry: %0 = bitcast <4 x i64> %__A to <8 x i32> %1 = bitcast <4 x i64> %__B to <8 x i32> - %2 = tail call <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32> %0, <8 x i32> %1) + %2 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> %1) %3 = bitcast <4 x i64> %__W to <8 x i32> %4 = bitcast i8 %__U to <8 x i1> %5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3 @@ -6962,7 +6954,7 @@ define <4 x i64> @test_mm256_maskz_rolv_epi32(i8 zeroext %__U, <4 x i64> %__A, < entry: %0 = bitcast <4 x i64> %__A to <8 x i32> %1 = bitcast <4 x i64> %__B to <8 x i32> - %2 = tail call <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32> %0, <8 x i32> %1) + %2 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> %1) %3 = bitcast i8 %__U to <8 x i1> %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer %5 = bitcast <8 x i32> %4 to <4 x i64> @@ -6975,7 +6967,7 @@ define <2 x i64> @test_mm_rolv_epi64(<2 x i64> %__A, <2 x i64> %__B) { ; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: ret{{[l|q]}} entry: - %0 = tail call <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64> %__A, <2 x i64> %__B) + %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> %__B) ret <2 x i64> %0 } @@ -6993,7 +6985,7 @@ define <2 x i64> @test_mm_mask_rolv_epi64(<2 x i64> %__W, i8 zeroext %__U, <2 x ; X64-NEXT: vprolvq %xmm2, %xmm1, %xmm0 {%k1} ; X64-NEXT: retq entry: - %0 = tail call <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64> %__A, <2 x i64> %__B) + %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> %__B) %1 = bitcast i8 %__U to <8 x i1> %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__W @@ -7014,7 +7006,7 @@ define <2 x i64> @test_mm_maskz_rolv_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x ; X64-NEXT: vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z} ; X64-NEXT: retq entry: - %0 = tail call <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64> %__A, <2 x i64> %__B) + %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> %__B) %1 = bitcast i8 %__U to <8 x i1> %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer @@ -7027,7 +7019,7 @@ define <4 x i64> @test_mm256_rolv_epi64(<4 x i64> %__A, <4 x i64> %__B) { ; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} entry: - %0 = tail call <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64> %__A, <4 x i64> %__B) + %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> %__B) ret <4 x i64> %0 } @@ -7045,7 +7037,7 @@ define <4 x i64> @test_mm256_mask_rolv_epi64(<4 x i64> %__W, i8 zeroext %__U, <4 ; X64-NEXT: vprolvq %ymm2, %ymm1, %ymm0 {%k1} ; X64-NEXT: retq entry: - %0 = tail call <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64> %__A, <4 x i64> %__B) + %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> %__B) %1 = bitcast i8 %__U to <8 x i1> %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__W @@ -7066,7 +7058,7 @@ define <4 x i64> @test_mm256_maskz_rolv_epi64(i8 zeroext %__U, <4 x i64> %__A, < ; X64-NEXT: vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z} ; X64-NEXT: retq entry: - %0 = tail call <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64> %__A, <4 x i64> %__B) + %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> %__B) %1 = bitcast i8 %__U to <8 x i1> %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer @@ -7080,13 +7072,11 @@ define <2 x i64> @test_mm_ror_epi32(<2 x i64> %__A) { ; CHECK-NEXT: ret{{[l|q]}} entry: %0 = bitcast <2 x i64> %__A to <4 x i32> - %1 = tail call <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32> %0, i32 5) + %1 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> <i32 5, i32 5, i32 5, i32 5>) %2 = bitcast <4 x i32> %1 to <2 x i64> ret <2 x i64> %2 } -declare <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32>, i32) - define <2 x i64> @test_mm_mask_ror_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A) { ; X86-LABEL: test_mm_mask_ror_epi32: ; X86: # %bb.0: # %entry @@ -7102,7 +7092,7 @@ define <2 x i64> @test_mm_mask_ror_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x i ; X64-NEXT: retq entry: %0 = bitcast <2 x i64> %__A to <4 x i32> - %1 = tail call <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32> %0, i32 5) + %1 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> <i32 5, i32 5, i32 5, i32 5>) %2 = bitcast <2 x i64> %__W to <4 x i32> %3 = bitcast i8 %__U to <8 x i1> %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -7126,7 +7116,7 @@ define <2 x i64> @test_mm_maskz_ror_epi32(i8 zeroext %__U, <2 x i64> %__A) { ; X64-NEXT: retq entry: %0 = bitcast <2 x i64> %__A to <4 x i32> - %1 = tail call <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32> %0, i32 5) + %1 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> <i32 5, i32 5, i32 5, i32 5>) %2 = bitcast i8 %__U to <8 x i1> %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> zeroinitializer @@ -7141,13 +7131,11 @@ define <4 x i64> @test_mm256_ror_epi32(<4 x i64> %__A) { ; CHECK-NEXT: ret{{[l|q]}} entry: %0 = bitcast <4 x i64> %__A to <8 x i32> - %1 = tail call <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32> %0, i32 5) + %1 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>) %2 = bitcast <8 x i32> %1 to <4 x i64> ret <4 x i64> %2 } -declare <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32>, i32) - define <4 x i64> @test_mm256_mask_ror_epi32(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A) { ; X86-LABEL: test_mm256_mask_ror_epi32: ; X86: # %bb.0: # %entry @@ -7163,7 +7151,7 @@ define <4 x i64> @test_mm256_mask_ror_epi32(<4 x i64> %__W, i8 zeroext %__U, <4 ; X64-NEXT: retq entry: %0 = bitcast <4 x i64> %__A to <8 x i32> - %1 = tail call <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32> %0, i32 5) + %1 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>) %2 = bitcast <4 x i64> %__W to <8 x i32> %3 = bitcast i8 %__U to <8 x i1> %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2 @@ -7186,7 +7174,7 @@ define <4 x i64> @test_mm256_maskz_ror_epi32(i8 zeroext %__U, <4 x i64> %__A) { ; X64-NEXT: retq entry: %0 = bitcast <4 x i64> %__A to <8 x i32> - %1 = tail call <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32> %0, i32 5) + %1 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>) %2 = bitcast i8 %__U to <8 x i1> %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer %4 = bitcast <8 x i32> %3 to <4 x i64> @@ -7199,12 +7187,10 @@ define <2 x i64> @test_mm_ror_epi64(<2 x i64> %__A) { ; CHECK-NEXT: vprorq $5, %xmm0, %xmm0 ; CHECK-NEXT: ret{{[l|q]}} entry: - %0 = tail call <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64> %__A, i32 5) + %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> <i64 5, i64 5>) ret <2 x i64> %0 } -declare <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64>, i32) - define <2 x i64> @test_mm_mask_ror_epi64(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A) { ; X86-LABEL: test_mm_mask_ror_epi64: ; X86: # %bb.0: # %entry @@ -7219,7 +7205,7 @@ define <2 x i64> @test_mm_mask_ror_epi64(<2 x i64> %__W, i8 zeroext %__U, <2 x i ; X64-NEXT: vprorq $5, %xmm1, %xmm0 {%k1} ; X64-NEXT: retq entry: - %0 = tail call <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64> %__A, i32 5) + %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> <i64 5, i64 5>) %1 = bitcast i8 %__U to <8 x i1> %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> %__W @@ -7240,7 +7226,7 @@ define <2 x i64> @test_mm_maskz_ror_epi64(i8 zeroext %__U, <2 x i64> %__A) { ; X64-NEXT: vprorq $5, %xmm0, %xmm0 {%k1} {z} ; X64-NEXT: retq entry: - %0 = tail call <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64> %__A, i32 5) + %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> <i64 5, i64 5>) %1 = bitcast i8 %__U to <8 x i1> %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> zeroinitializer @@ -7253,12 +7239,10 @@ define <4 x i64> @test_mm256_ror_epi64(<4 x i64> %__A) { ; CHECK-NEXT: vprorq $5, %ymm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} entry: - %0 = tail call <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64> %__A, i32 5) + %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> <i64 5, i64 5, i64 5, i64 5>) ret <4 x i64> %0 } -declare <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64>, i32) - define <4 x i64> @test_mm256_mask_ror_epi64(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A) { ; X86-LABEL: test_mm256_mask_ror_epi64: ; X86: # %bb.0: # %entry @@ -7273,7 +7257,7 @@ define <4 x i64> @test_mm256_mask_ror_epi64(<4 x i64> %__W, i8 zeroext %__U, <4 ; X64-NEXT: vprorq $5, %ymm1, %ymm0 {%k1} ; X64-NEXT: retq entry: - %0 = tail call <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64> %__A, i32 5) + %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> <i64 5, i64 5, i64 5, i64 5>) %1 = bitcast i8 %__U to <8 x i1> %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> %__W @@ -7294,7 +7278,7 @@ define <4 x i64> @test_mm256_maskz_ror_epi64(i8 zeroext %__U, <4 x i64> %__A) { ; X64-NEXT: vprorq $5, %ymm0, %ymm0 {%k1} {z} ; X64-NEXT: retq entry: - %0 = tail call <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64> %__A, i32 5) + %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> <i64 5, i64 5, i64 5, i64 5>) %1 = bitcast i8 %__U to <8 x i1> %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> zeroinitializer @@ -7309,7 +7293,7 @@ define <2 x i64> @test_mm_rorv_epi32(<2 x i64> %__A, <2 x i64> %__B) { entry: %0 = bitcast <2 x i64> %__A to <4 x i32> %1 = bitcast <2 x i64> %__B to <4 x i32> - %2 = tail call <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32> %0, <4 x i32> %1) + %2 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> %1) %3 = bitcast <4 x i32> %2 to <2 x i64> ret <2 x i64> %3 } @@ -7330,7 +7314,7 @@ define <2 x i64> @test_mm_mask_rorv_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x entry: %0 = bitcast <2 x i64> %__A to <4 x i32> %1 = bitcast <2 x i64> %__B to <4 x i32> - %2 = tail call <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32> %0, <4 x i32> %1) + %2 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> %1) %3 = bitcast <2 x i64> %__W to <4 x i32> %4 = bitcast i8 %__U to <8 x i1> %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -7355,7 +7339,7 @@ define <2 x i64> @test_mm_maskz_rorv_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x entry: %0 = bitcast <2 x i64> %__A to <4 x i32> %1 = bitcast <2 x i64> %__B to <4 x i32> - %2 = tail call <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32> %0, <4 x i32> %1) + %2 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> %1) %3 = bitcast i8 %__U to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %4 = select <4 x i1> %extract.i, <4 x i32> %2, <4 x i32> zeroinitializer @@ -7371,7 +7355,7 @@ define <4 x i64> @test_mm256_rorv_epi32(<4 x i64> %__A, <4 x i64> %__B) { entry: %0 = bitcast <4 x i64> %__A to <8 x i32> %1 = bitcast <4 x i64> %__B to <8 x i32> - %2 = tail call <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32> %0, <8 x i32> %1) + %2 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> %1) %3 = bitcast <8 x i32> %2 to <4 x i64> ret <4 x i64> %3 } @@ -7392,7 +7376,7 @@ define <4 x i64> @test_mm256_mask_rorv_epi32(<4 x i64> %__W, i8 zeroext %__U, <4 entry: %0 = bitcast <4 x i64> %__A to <8 x i32> %1 = bitcast <4 x i64> %__B to <8 x i32> - %2 = tail call <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32> %0, <8 x i32> %1) + %2 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> %1) %3 = bitcast <4 x i64> %__W to <8 x i32> %4 = bitcast i8 %__U to <8 x i1> %5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3 @@ -7416,7 +7400,7 @@ define <4 x i64> @test_mm256_maskz_rorv_epi32(i8 zeroext %__U, <4 x i64> %__A, < entry: %0 = bitcast <4 x i64> %__A to <8 x i32> %1 = bitcast <4 x i64> %__B to <8 x i32> - %2 = tail call <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32> %0, <8 x i32> %1) + %2 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> %1) %3 = bitcast i8 %__U to <8 x i1> %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer %5 = bitcast <8 x i32> %4 to <4 x i64> @@ -7429,7 +7413,7 @@ define <2 x i64> @test_mm_rorv_epi64(<2 x i64> %__A, <2 x i64> %__B) { ; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: ret{{[l|q]}} entry: - %0 = tail call <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64> %__A, <2 x i64> %__B) + %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> %__B) ret <2 x i64> %0 } @@ -7447,7 +7431,7 @@ define <2 x i64> @test_mm_mask_rorv_epi64(<2 x i64> %__W, i8 zeroext %__U, <2 x ; X64-NEXT: vprorvq %xmm2, %xmm1, %xmm0 {%k1} ; X64-NEXT: retq entry: - %0 = tail call <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64> %__A, <2 x i64> %__B) + %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> %__B) %1 = bitcast i8 %__U to <8 x i1> %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__W @@ -7468,7 +7452,7 @@ define <2 x i64> @test_mm_maskz_rorv_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x ; X64-NEXT: vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z} ; X64-NEXT: retq entry: - %0 = tail call <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64> %__A, <2 x i64> %__B) + %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__A, <2 x i64> %__A, <2 x i64> %__B) %1 = bitcast i8 %__U to <8 x i1> %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer @@ -7481,7 +7465,7 @@ define <4 x i64> @test_mm256_rorv_epi64(<4 x i64> %__A, <4 x i64> %__B) { ; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} entry: - %0 = tail call <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64> %__A, <4 x i64> %__B) + %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> %__B) ret <4 x i64> %0 } @@ -7499,7 +7483,7 @@ define <4 x i64> @test_mm256_mask_rorv_epi64(<4 x i64> %__W, i8 zeroext %__U, <4 ; X64-NEXT: vprorvq %ymm2, %ymm1, %ymm0 {%k1} ; X64-NEXT: retq entry: - %0 = tail call <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64> %__A, <4 x i64> %__B) + %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> %__B) %1 = bitcast i8 %__U to <8 x i1> %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__W @@ -7520,7 +7504,7 @@ define <4 x i64> @test_mm256_maskz_rorv_epi64(i8 zeroext %__U, <4 x i64> %__A, < ; X64-NEXT: vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z} ; X64-NEXT: retq entry: - %0 = tail call <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64> %__A, <4 x i64> %__B) + %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__A, <4 x i64> %__A, <4 x i64> %__B) %1 = bitcast i8 %__U to <8 x i1> %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer @@ -7572,13 +7556,13 @@ declare void @llvm.masked.compressstore.v4f32(<4 x float>, float*, <4 x i1>) declare void @llvm.masked.compressstore.v8f32(<8 x float>, float*, <8 x i1>) declare void @llvm.masked.compressstore.v4i32(<4 x i32>, i32*, <4 x i1>) declare void @llvm.masked.compressstore.v8i32(<8 x i32>, i32*, <8 x i1>) -declare <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32>, <4 x i32>) -declare <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32>, <8 x i32>) -declare <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64>, <2 x i64>) -declare <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64>, <4 x i64>) -declare <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32>, <4 x i32>) -declare <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32>, <8 x i32>) -declare <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64>, <2 x i64>) -declare <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64>, <4 x i64>) +declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +declare <8 x i32> @llvm.fshl.v8i32(<8 x i32>, <8 x i32>, <8 x i32>) +declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) +declare <4 x i64> @llvm.fshl.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) +declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +declare <8 x i32> @llvm.fshr.v8i32(<8 x i32>, <8 x i32>, <8 x i32>) +declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) +declare <4 x i64> @llvm.fshr.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) !0 = !{i32 1} |