aboutsummaryrefslogtreecommitdiff
path: root/test/CodeGen/X86/avx2-vbroadcast.ll
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/X86/avx2-vbroadcast.ll')
-rw-r--r--test/CodeGen/X86/avx2-vbroadcast.ll441
1 files changed, 355 insertions, 86 deletions
diff --git a/test/CodeGen/X86/avx2-vbroadcast.ll b/test/CodeGen/X86/avx2-vbroadcast.ll
index 94dcdcabdd33..6b77edb155a4 100644
--- a/test/CodeGen/X86/avx2-vbroadcast.ll
+++ b/test/CodeGen/X86/avx2-vbroadcast.ll
@@ -1,7 +1,11 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s
-; CHECK: vpbroadcastb (%
define <16 x i8> @BB16(i8* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: BB16:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0
+; CHECK-NEXT: retq
entry:
%q = load i8, i8* %ptr, align 4
%q0 = insertelement <16 x i8> undef, i8 %q, i32 0
@@ -22,8 +26,12 @@ entry:
%qf = insertelement <16 x i8> %qe, i8 %q, i32 15
ret <16 x i8> %qf
}
-; CHECK: vpbroadcastb (%
+
define <32 x i8> @BB32(i8* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: BB32:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0
+; CHECK-NEXT: retq
entry:
%q = load i8, i8* %ptr, align 4
%q0 = insertelement <32 x i8> undef, i8 %q, i32 0
@@ -61,9 +69,12 @@ entry:
%q2f = insertelement <32 x i8> %q2e, i8 %q, i32 31
ret <32 x i8> %q2f
}
-; CHECK: vpbroadcastw (%
define <8 x i16> @W16(i16* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: W16:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0
+; CHECK-NEXT: retq
entry:
%q = load i16, i16* %ptr, align 4
%q0 = insertelement <8 x i16> undef, i16 %q, i32 0
@@ -76,8 +87,12 @@ entry:
%q7 = insertelement <8 x i16> %q6, i16 %q, i32 7
ret <8 x i16> %q7
}
-; CHECK: vpbroadcastw (%
+
define <16 x i16> @WW16(i16* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: WW16:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0
+; CHECK-NEXT: retq
entry:
%q = load i16, i16* %ptr, align 4
%q0 = insertelement <16 x i16> undef, i16 %q, i32 0
@@ -98,8 +113,12 @@ entry:
%qf = insertelement <16 x i16> %qe, i16 %q, i32 15
ret <16 x i16> %qf
}
-; CHECK: vbroadcastss (%
+
define <4 x i32> @D32(i32* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: D32:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastss (%rdi), %xmm0
+; CHECK-NEXT: retq
entry:
%q = load i32, i32* %ptr, align 4
%q0 = insertelement <4 x i32> undef, i32 %q, i32 0
@@ -108,8 +127,12 @@ entry:
%q3 = insertelement <4 x i32> %q2, i32 %q, i32 3
ret <4 x i32> %q3
}
-; CHECK: vbroadcastss (%
+
define <8 x i32> @DD32(i32* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: DD32:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastss (%rdi), %ymm0
+; CHECK-NEXT: retq
entry:
%q = load i32, i32* %ptr, align 4
%q0 = insertelement <8 x i32> undef, i32 %q, i32 0
@@ -122,16 +145,24 @@ entry:
%q7 = insertelement <8 x i32> %q6, i32 %q, i32 7
ret <8 x i32> %q7
}
-; CHECK: vpbroadcastq (%
+
define <2 x i64> @Q64(i64* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: Q64:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastq (%rdi), %xmm0
+; CHECK-NEXT: retq
entry:
%q = load i64, i64* %ptr, align 4
%q0 = insertelement <2 x i64> undef, i64 %q, i32 0
%q1 = insertelement <2 x i64> %q0, i64 %q, i32 1
ret <2 x i64> %q1
}
-; CHECK: vbroadcastsd (%
+
define <4 x i64> @QQ64(i64* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: QQ64:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0
+; CHECK-NEXT: retq
entry:
%q = load i64, i64* %ptr, align 4
%q0 = insertelement <4 x i64> undef, i64 %q, i32 0
@@ -141,9 +172,214 @@ entry:
ret <4 x i64> %q3
}
+; FIXME: Pointer adjusted broadcasts
+
+define <16 x i8> @load_splat_16i8_16i8_1111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_16i8_16i8_1111111111111111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastb 1(%rdi), %xmm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <16 x i8>, <16 x i8>* %ptr
+ %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <16 x i8> %ret
+}
+
+define <32 x i8> @load_splat_32i8_16i8_11111111111111111111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_32i8_16i8_11111111111111111111111111111111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastb 1(%rdi), %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <16 x i8>, <16 x i8>* %ptr
+ %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <32 x i8> %ret
+}
+
+define <32 x i8> @load_splat_32i8_32i8_11111111111111111111111111111111(<32 x i8>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_32i8_32i8_11111111111111111111111111111111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastb 1(%rdi), %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <32 x i8>, <32 x i8>* %ptr
+ %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <32 x i8> %ret
+}
+
+define <8 x i16> @load_splat_8i16_8i16_11111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8i16_8i16_11111111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastw 2(%rdi), %xmm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <8 x i16>, <8 x i16>* %ptr
+ %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <8 x i16> %ret
+}
+
+define <16 x i16> @load_splat_16i16_8i16_1111111111111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_16i16_8i16_1111111111111111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastw 2(%rdi), %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <8 x i16>, <8 x i16>* %ptr
+ %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <16 x i16> %ret
+}
+
+define <16 x i16> @load_splat_16i16_16i16_1111111111111111(<16 x i16>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_16i16_16i16_1111111111111111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastw 2(%rdi), %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <16 x i16>, <16 x i16>* %ptr
+ %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <16 x i16> %ret
+}
+
+define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4i32_4i32_1111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastss 4(%rdi), %xmm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <4 x i32>, <4 x i32>* %ptr
+ %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %ret
+}
+
+define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8i32_4i32_33333333:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastss 12(%rdi), %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <4 x i32>, <4 x i32>* %ptr
+ %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+ ret <8 x i32> %ret
+}
+
+define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8i32_8i32_55555555:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastss 20(%rdi), %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <8 x i32>, <8 x i32>* %ptr
+ %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+ ret <8 x i32> %ret
+}
+
+define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4f32_4f32_1111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastss 4(%rdi), %xmm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <4 x float>, <4 x float>* %ptr
+ %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x float> %ret
+}
+
+define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8f32_4f32_33333333:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastss 12(%rdi), %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <4 x float>, <4 x float>* %ptr
+ %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+ ret <8 x float> %ret
+}
+
+define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8f32_8f32_55555555:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastss 20(%rdi), %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <8 x float>, <8 x float>* %ptr
+ %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+ ret <8 x float> %ret
+}
+
+define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_2i64_2i64_1111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastq 8(%rdi), %xmm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <2 x i64>, <2 x i64>* %ptr
+ %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+ ret <2 x i64> %ret
+}
+
+define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4i64_2i64_1111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastsd 8(%rdi), %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <2 x i64>, <2 x i64>* %ptr
+ %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i64> %ret
+}
+
+define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4i64_4i64_2222:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <4 x i64>, <4 x i64>* %ptr
+ %ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+ ret <4 x i64> %ret
+}
+
+define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_2f64_2f64_1111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmovaps (%rdi), %xmm0
+; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: retq
+entry:
+ %ld = load <2 x double>, <2 x double>* %ptr
+ %ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+ ret <2 x double> %ret
+}
+
+define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4f64_2f64_1111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastsd 8(%rdi), %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <2 x double>, <2 x double>* %ptr
+ %ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x double> %ret
+}
+
+define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4f64_4f64_2222:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <4 x double>, <4 x double>* %ptr
+ %ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+ ret <4 x double> %ret
+}
+
; make sure that we still don't support broadcast double into 128-bit vector
; this used to crash
define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: I:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
+; CHECK-NEXT: retq
entry:
%q = load double, double* %ptr, align 4
%vecinit.i = insertelement <2 x double> undef, double %q, i32 0
@@ -151,28 +387,33 @@ entry:
ret <2 x double> %vecinit2.i
}
-; CHECK: V111
-; CHECK: vpbroadcastd
-; CHECK: ret
define <8 x i32> @V111(<8 x i32> %in) nounwind uwtable readnone ssp {
+; CHECK-LABEL: V111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
+; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
entry:
%g = add <8 x i32> %in, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
ret <8 x i32> %g
}
-; CHECK: V113
-; CHECK: vbroadcastss
-; CHECK: ret
define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp {
+; CHECK-LABEL: V113:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
+; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
entry:
%g = fadd <8 x float> %in, <float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000>
ret <8 x float> %g
}
-; CHECK: _e2
-; CHECK: vbroadcastss
-; CHECK: ret
define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: _e2:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %xmm0
+; CHECK-NEXT: retq
%vecinit.i = insertelement <4 x float> undef, float 0xbf80000000000000, i32 0
%vecinit2.i = insertelement <4 x float> %vecinit.i, float 0xbf80000000000000, i32 1
%vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2
@@ -180,10 +421,11 @@ define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
ret <4 x float> %vecinit6.i
}
-; CHECK: _e4
-; CHECK-NOT: broadcast
-; CHECK: ret
define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: _e4:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [52,52,52,52,52,52,52,52]
+; CHECK-NEXT: retq
%vecinit0.i = insertelement <8 x i8> undef, i8 52, i32 0
%vecinit1.i = insertelement <8 x i8> %vecinit0.i, i8 52, i32 1
%vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 52, i32 2
@@ -197,6 +439,17 @@ define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp {
define void @crash() nounwind alwaysinline {
+; CHECK-LABEL: crash:
+; CHECK: ## BB#0: ## %WGLoopsEntry
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: je LBB31_1
+; CHECK-NEXT: ## BB#2: ## %ret
+; CHECK-NEXT: retq
+; CHECK-NEXT: .align 4, 0x90
+; CHECK-NEXT: LBB31_1: ## %footer349VF
+; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: jmp LBB31_1
WGLoopsEntry:
br i1 undef, label %ret, label %footer329VF
@@ -223,135 +476,151 @@ ret:
ret void
}
-; CHECK: _inreg0
-; CHECK: broadcastss
-; CHECK: ret
define <8 x i32> @_inreg0(i32 %scalar) nounwind uwtable readnone ssp {
+; CHECK-LABEL: _inreg0:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vmovd %edi, %xmm0
+; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
+; CHECK-NEXT: retq
%in = insertelement <8 x i32> undef, i32 %scalar, i32 0
%wide = shufflevector <8 x i32> %in, <8 x i32> undef, <8 x i32> zeroinitializer
ret <8 x i32> %wide
}
-; CHECK: _inreg1
-; CHECK: broadcastss
-; CHECK: ret
define <8 x float> @_inreg1(float %scalar) nounwind uwtable readnone ssp {
+; CHECK-LABEL: _inreg1:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
+; CHECK-NEXT: retq
%in = insertelement <8 x float> undef, float %scalar, i32 0
%wide = shufflevector <8 x float> %in, <8 x float> undef, <8 x i32> zeroinitializer
ret <8 x float> %wide
}
-; CHECK: _inreg2
-; CHECK: broadcastss
-; CHECK: ret
define <4 x float> @_inreg2(float %scalar) nounwind uwtable readnone ssp {
+; CHECK-LABEL: _inreg2:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastss %xmm0, %xmm0
+; CHECK-NEXT: retq
%in = insertelement <4 x float> undef, float %scalar, i32 0
%wide = shufflevector <4 x float> %in, <4 x float> undef, <4 x i32> zeroinitializer
ret <4 x float> %wide
}
-; CHECK: _inreg3
-; CHECK: broadcastsd
-; CHECK: ret
define <4 x double> @_inreg3(double %scalar) nounwind uwtable readnone ssp {
+; CHECK-LABEL: _inreg3:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
+; CHECK-NEXT: retq
%in = insertelement <4 x double> undef, double %scalar, i32 0
%wide = shufflevector <4 x double> %in, <4 x double> undef, <4 x i32> zeroinitializer
ret <4 x double> %wide
}
-;CHECK-LABEL: _inreg8xfloat:
-;CHECK: vbroadcastss
-;CHECK: ret
define <8 x float> @_inreg8xfloat(<8 x float> %a) {
+; CHECK-LABEL: _inreg8xfloat:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
+; CHECK-NEXT: retq
%b = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> zeroinitializer
ret <8 x float> %b
}
-;CHECK-LABEL: _inreg4xfloat:
-;CHECK: vbroadcastss
-;CHECK: ret
define <4 x float> @_inreg4xfloat(<4 x float> %a) {
+; CHECK-LABEL: _inreg4xfloat:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastss %xmm0, %xmm0
+; CHECK-NEXT: retq
%b = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer
ret <4 x float> %b
}
-;CHECK-LABEL: _inreg16xi16:
-;CHECK: vpbroadcastw
-;CHECK: ret
define <16 x i16> @_inreg16xi16(<16 x i16> %a) {
+; CHECK-LABEL: _inreg16xi16:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0
+; CHECK-NEXT: retq
%b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer
ret <16 x i16> %b
}
-;CHECK-LABEL: _inreg8xi16:
-;CHECK: vpbroadcastw
-;CHECK: ret
define <8 x i16> @_inreg8xi16(<8 x i16> %a) {
+; CHECK-LABEL: _inreg8xi16:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0
+; CHECK-NEXT: retq
%b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer
ret <8 x i16> %b
}
-
-;CHECK-LABEL: _inreg4xi64:
-;CHECK: vbroadcastsd
-;CHECK: ret
define <4 x i64> @_inreg4xi64(<4 x i64> %a) {
+; CHECK-LABEL: _inreg4xi64:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
+; CHECK-NEXT: retq
%b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer
ret <4 x i64> %b
}
-;CHECK-LABEL: _inreg2xi64:
-;CHECK: vpbroadcastq
-;CHECK: ret
define <2 x i64> @_inreg2xi64(<2 x i64> %a) {
+; CHECK-LABEL: _inreg2xi64:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0
+; CHECK-NEXT: retq
%b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer
ret <2 x i64> %b
}
-;CHECK-LABEL: _inreg4xdouble:
-;CHECK: vbroadcastsd
-;CHECK: ret
define <4 x double> @_inreg4xdouble(<4 x double> %a) {
+; CHECK-LABEL: _inreg4xdouble:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
+; CHECK-NEXT: retq
%b = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> zeroinitializer
ret <4 x double> %b
}
-;CHECK-LABEL: _inreg2xdouble:
-;CHECK: vmovddup
-;CHECK: ret
define <2 x double> @_inreg2xdouble(<2 x double> %a) {
+; CHECK-LABEL: _inreg2xdouble:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; CHECK-NEXT: retq
%b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer
ret <2 x double> %b
}
-;CHECK-LABEL: _inreg8xi32:
-;CHECK: vbroadcastss
-;CHECK: ret
define <8 x i32> @_inreg8xi32(<8 x i32> %a) {
+; CHECK-LABEL: _inreg8xi32:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
+; CHECK-NEXT: retq
%b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer
ret <8 x i32> %b
}
-;CHECK-LABEL: _inreg4xi32:
-;CHECK: vbroadcastss
-;CHECK: ret
define <4 x i32> @_inreg4xi32(<4 x i32> %a) {
+; CHECK-LABEL: _inreg4xi32:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastss %xmm0, %xmm0
+; CHECK-NEXT: retq
%b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer
ret <4 x i32> %b
}
-;CHECK-LABEL: _inreg32xi8:
-;CHECK: vpbroadcastb
-;CHECK: ret
define <32 x i8> @_inreg32xi8(<32 x i8> %a) {
+; CHECK-LABEL: _inreg32xi8:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0
+; CHECK-NEXT: retq
%b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer
ret <32 x i8> %b
}
-;CHECK-LABEL: _inreg16xi8:
-;CHECK: vpbroadcastb
-;CHECK: ret
define <16 x i8> @_inreg16xi8(<16 x i8> %a) {
+; CHECK-LABEL: _inreg16xi8:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0
+; CHECK-NEXT: retq
%b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer
ret <16 x i8> %b
}
@@ -360,11 +629,11 @@ define <16 x i8> @_inreg16xi8(<16 x i8> %a) {
; formed from a concat_vectors (via the shufflevector) of two BUILD_VECTORs
; (via the insertelements).
-; CHECK-LABEL: splat_concat1
-; CHECK-NOT: vinsertf128
-; CHECK: vbroadcastss
-; CHECK-NEXT: ret
define <8 x float> @splat_concat1(float %f) {
+; CHECK-LABEL: splat_concat1:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
+; CHECK-NEXT: retq
%1 = insertelement <4 x float> undef, float %f, i32 0
%2 = insertelement <4 x float> %1, float %f, i32 1
%3 = insertelement <4 x float> %2, float %f, i32 2
@@ -373,11 +642,11 @@ define <8 x float> @splat_concat1(float %f) {
ret <8 x float> %5
}
-; CHECK-LABEL: splat_concat2
-; CHECK-NOT: vinsertf128
-; CHECK: vbroadcastss
-; CHECK-NEXT: ret
define <8 x float> @splat_concat2(float %f) {
+; CHECK-LABEL: splat_concat2:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
+; CHECK-NEXT: retq
%1 = insertelement <4 x float> undef, float %f, i32 0
%2 = insertelement <4 x float> %1, float %f, i32 1
%3 = insertelement <4 x float> %2, float %f, i32 2
@@ -390,22 +659,22 @@ define <8 x float> @splat_concat2(float %f) {
ret <8 x float> %9
}
-; CHECK-LABEL: splat_concat3
-; CHECK-NOT: vinsertf128
-; CHECK: vbroadcastsd
-; CHECK-NEXT: ret
define <4 x double> @splat_concat3(double %d) {
+; CHECK-LABEL: splat_concat3:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
+; CHECK-NEXT: retq
%1 = insertelement <2 x double> undef, double %d, i32 0
%2 = insertelement <2 x double> %1, double %d, i32 1
%3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
ret <4 x double> %3
}
-; CHECK-LABEL: splat_concat4
-; CHECK-NOT: vinsertf128
-; CHECK: vbroadcastsd
-; CHECK-NEXT: ret
define <4 x double> @splat_concat4(double %d) {
+; CHECK-LABEL: splat_concat4:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
+; CHECK-NEXT: retq
%1 = insertelement <2 x double> undef, double %d, i32 0
%2 = insertelement <2 x double> %1, double %d, i32 1
%3 = insertelement <2 x double> undef, double %d, i32 0