diff options
Diffstat (limited to 'test/CodeGen/X86/avx2-vbroadcast.ll')
-rw-r--r-- | test/CodeGen/X86/avx2-vbroadcast.ll | 441 |
1 files changed, 355 insertions, 86 deletions
diff --git a/test/CodeGen/X86/avx2-vbroadcast.ll b/test/CodeGen/X86/avx2-vbroadcast.ll index 94dcdcabdd33..6b77edb155a4 100644 --- a/test/CodeGen/X86/avx2-vbroadcast.ll +++ b/test/CodeGen/X86/avx2-vbroadcast.ll @@ -1,7 +1,11 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s -; CHECK: vpbroadcastb (% define <16 x i8> @BB16(i8* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: BB16: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0 +; CHECK-NEXT: retq entry: %q = load i8, i8* %ptr, align 4 %q0 = insertelement <16 x i8> undef, i8 %q, i32 0 @@ -22,8 +26,12 @@ entry: %qf = insertelement <16 x i8> %qe, i8 %q, i32 15 ret <16 x i8> %qf } -; CHECK: vpbroadcastb (% + define <32 x i8> @BB32(i8* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: BB32: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0 +; CHECK-NEXT: retq entry: %q = load i8, i8* %ptr, align 4 %q0 = insertelement <32 x i8> undef, i8 %q, i32 0 @@ -61,9 +69,12 @@ entry: %q2f = insertelement <32 x i8> %q2e, i8 %q, i32 31 ret <32 x i8> %q2f } -; CHECK: vpbroadcastw (% define <8 x i16> @W16(i16* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: W16: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0 +; CHECK-NEXT: retq entry: %q = load i16, i16* %ptr, align 4 %q0 = insertelement <8 x i16> undef, i16 %q, i32 0 @@ -76,8 +87,12 @@ entry: %q7 = insertelement <8 x i16> %q6, i16 %q, i32 7 ret <8 x i16> %q7 } -; CHECK: vpbroadcastw (% + define <16 x i16> @WW16(i16* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: WW16: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0 +; CHECK-NEXT: retq entry: %q = load i16, i16* %ptr, align 4 %q0 = insertelement <16 x i16> undef, i16 %q, i32 0 @@ -98,8 +113,12 @@ entry: %qf = insertelement <16 x i16> %qe, i16 %q, i32 15 ret <16 x i16> %qf } -; CHECK: vbroadcastss (% + define <4 x i32> @D32(i32* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: D32: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 +; CHECK-NEXT: retq entry: %q = load i32, i32* %ptr, align 4 %q0 = insertelement <4 x i32> undef, i32 %q, i32 0 @@ -108,8 +127,12 @@ entry: %q3 = insertelement <4 x i32> %q2, i32 %q, i32 3 ret <4 x i32> %q3 } -; CHECK: vbroadcastss (% + define <8 x i32> @DD32(i32* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: DD32: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 +; CHECK-NEXT: retq entry: %q = load i32, i32* %ptr, align 4 %q0 = insertelement <8 x i32> undef, i32 %q, i32 0 @@ -122,16 +145,24 @@ entry: %q7 = insertelement <8 x i32> %q6, i32 %q, i32 7 ret <8 x i32> %q7 } -; CHECK: vpbroadcastq (% + define <2 x i64> @Q64(i64* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: Q64: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpbroadcastq (%rdi), %xmm0 +; CHECK-NEXT: retq entry: %q = load i64, i64* %ptr, align 4 %q0 = insertelement <2 x i64> undef, i64 %q, i32 0 %q1 = insertelement <2 x i64> %q0, i64 %q, i32 1 ret <2 x i64> %q1 } -; CHECK: vbroadcastsd (% + define <4 x i64> @QQ64(i64* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: QQ64: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 +; CHECK-NEXT: retq entry: %q = load i64, i64* %ptr, align 4 %q0 = insertelement <4 x i64> undef, i64 %q, i32 0 @@ -141,9 +172,214 @@ entry: ret <4 x i64> %q3 } +; FIXME: Pointer adjusted broadcasts + +define <16 x i8> @load_splat_16i8_16i8_1111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_16i8_16i8_1111111111111111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpbroadcastb 1(%rdi), %xmm0 +; CHECK-NEXT: retq +entry: + %ld = load <16 x i8>, <16 x i8>* %ptr + %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + ret <16 x i8> %ret +} + +define <32 x i8> @load_splat_32i8_16i8_11111111111111111111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_32i8_16i8_11111111111111111111111111111111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpbroadcastb 1(%rdi), %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <16 x i8>, <16 x i8>* %ptr + %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + ret <32 x i8> %ret +} + +define <32 x i8> @load_splat_32i8_32i8_11111111111111111111111111111111(<32 x i8>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_32i8_32i8_11111111111111111111111111111111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpbroadcastb 1(%rdi), %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <32 x i8>, <32 x i8>* %ptr + %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + ret <32 x i8> %ret +} + +define <8 x i16> @load_splat_8i16_8i16_11111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_8i16_8i16_11111111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpbroadcastw 2(%rdi), %xmm0 +; CHECK-NEXT: retq +entry: + %ld = load <8 x i16>, <8 x i16>* %ptr + %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + ret <8 x i16> %ret +} + +define <16 x i16> @load_splat_16i16_8i16_1111111111111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_16i16_8i16_1111111111111111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpbroadcastw 2(%rdi), %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <8 x i16>, <8 x i16>* %ptr + %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + ret <16 x i16> %ret +} + +define <16 x i16> @load_splat_16i16_16i16_1111111111111111(<16 x i16>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_16i16_16i16_1111111111111111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpbroadcastw 2(%rdi), %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <16 x i16>, <16 x i16>* %ptr + %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + ret <16 x i16> %ret +} + +define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_4i32_4i32_1111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vbroadcastss 4(%rdi), %xmm0 +; CHECK-NEXT: retq +entry: + %ld = load <4 x i32>, <4 x i32>* %ptr + %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> + ret <4 x i32> %ret +} + +define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_8i32_4i32_33333333: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vbroadcastss 12(%rdi), %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <4 x i32>, <4 x i32>* %ptr + %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> + ret <8 x i32> %ret +} + +define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_8i32_8i32_55555555: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vbroadcastss 20(%rdi), %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <8 x i32>, <8 x i32>* %ptr + %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> + ret <8 x i32> %ret +} + +define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_4f32_4f32_1111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vbroadcastss 4(%rdi), %xmm0 +; CHECK-NEXT: retq +entry: + %ld = load <4 x float>, <4 x float>* %ptr + %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> + ret <4 x float> %ret +} + +define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_8f32_4f32_33333333: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vbroadcastss 12(%rdi), %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <4 x float>, <4 x float>* %ptr + %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> + ret <8 x float> %ret +} + +define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_8f32_8f32_55555555: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vbroadcastss 20(%rdi), %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <8 x float>, <8 x float>* %ptr + %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> + ret <8 x float> %ret +} + +define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_2i64_2i64_1111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpbroadcastq 8(%rdi), %xmm0 +; CHECK-NEXT: retq +entry: + %ld = load <2 x i64>, <2 x i64>* %ptr + %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1> + ret <2 x i64> %ret +} + +define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_4i64_2i64_1111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vbroadcastsd 8(%rdi), %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <2 x i64>, <2 x i64>* %ptr + %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> + ret <4 x i64> %ret +} + +define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_4i64_4i64_2222: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <4 x i64>, <4 x i64>* %ptr + %ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> + ret <4 x i64> %ret +} + +define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_2f64_2f64_1111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: retq +entry: + %ld = load <2 x double>, <2 x double>* %ptr + %ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 1> + ret <2 x double> %ret +} + +define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_4f64_2f64_1111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vbroadcastsd 8(%rdi), %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <2 x double>, <2 x double>* %ptr + %ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> + ret <4 x double> %ret +} + +define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: load_splat_4f64_4f64_2222: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0 +; CHECK-NEXT: retq +entry: + %ld = load <4 x double>, <4 x double>* %ptr + %ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> + ret <4 x double> %ret +} + ; make sure that we still don't support broadcast double into 128-bit vector ; this used to crash define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: I: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] +; CHECK-NEXT: retq entry: %q = load double, double* %ptr, align 4 %vecinit.i = insertelement <2 x double> undef, double %q, i32 0 @@ -151,28 +387,33 @@ entry: ret <2 x double> %vecinit2.i } -; CHECK: V111 -; CHECK: vpbroadcastd -; CHECK: ret define <8 x i32> @V111(<8 x i32> %in) nounwind uwtable readnone ssp { +; CHECK-LABEL: V111: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 +; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: retq entry: %g = add <8 x i32> %in, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> ret <8 x i32> %g } -; CHECK: V113 -; CHECK: vbroadcastss -; CHECK: ret define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp { +; CHECK-LABEL: V113: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 +; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: retq entry: %g = fadd <8 x float> %in, <float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000> ret <8 x float> %g } -; CHECK: _e2 -; CHECK: vbroadcastss -; CHECK: ret define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: _e2: +; CHECK: ## BB#0: +; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %xmm0 +; CHECK-NEXT: retq %vecinit.i = insertelement <4 x float> undef, float 0xbf80000000000000, i32 0 %vecinit2.i = insertelement <4 x float> %vecinit.i, float 0xbf80000000000000, i32 1 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2 @@ -180,10 +421,11 @@ define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp { ret <4 x float> %vecinit6.i } -; CHECK: _e4 -; CHECK-NOT: broadcast -; CHECK: ret define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp { +; CHECK-LABEL: _e4: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [52,52,52,52,52,52,52,52] +; CHECK-NEXT: retq %vecinit0.i = insertelement <8 x i8> undef, i8 52, i32 0 %vecinit1.i = insertelement <8 x i8> %vecinit0.i, i8 52, i32 1 %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 52, i32 2 @@ -197,6 +439,17 @@ define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp { define void @crash() nounwind alwaysinline { +; CHECK-LABEL: crash: +; CHECK: ## BB#0: ## %WGLoopsEntry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: je LBB31_1 +; CHECK-NEXT: ## BB#2: ## %ret +; CHECK-NEXT: retq +; CHECK-NEXT: .align 4, 0x90 +; CHECK-NEXT: LBB31_1: ## %footer349VF +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: jmp LBB31_1 WGLoopsEntry: br i1 undef, label %ret, label %footer329VF @@ -223,135 +476,151 @@ ret: ret void } -; CHECK: _inreg0 -; CHECK: broadcastss -; CHECK: ret define <8 x i32> @_inreg0(i32 %scalar) nounwind uwtable readnone ssp { +; CHECK-LABEL: _inreg0: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovd %edi, %xmm0 +; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 +; CHECK-NEXT: retq %in = insertelement <8 x i32> undef, i32 %scalar, i32 0 %wide = shufflevector <8 x i32> %in, <8 x i32> undef, <8 x i32> zeroinitializer ret <8 x i32> %wide } -; CHECK: _inreg1 -; CHECK: broadcastss -; CHECK: ret define <8 x float> @_inreg1(float %scalar) nounwind uwtable readnone ssp { +; CHECK-LABEL: _inreg1: +; CHECK: ## BB#0: +; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 +; CHECK-NEXT: retq %in = insertelement <8 x float> undef, float %scalar, i32 0 %wide = shufflevector <8 x float> %in, <8 x float> undef, <8 x i32> zeroinitializer ret <8 x float> %wide } -; CHECK: _inreg2 -; CHECK: broadcastss -; CHECK: ret define <4 x float> @_inreg2(float %scalar) nounwind uwtable readnone ssp { +; CHECK-LABEL: _inreg2: +; CHECK: ## BB#0: +; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 +; CHECK-NEXT: retq %in = insertelement <4 x float> undef, float %scalar, i32 0 %wide = shufflevector <4 x float> %in, <4 x float> undef, <4 x i32> zeroinitializer ret <4 x float> %wide } -; CHECK: _inreg3 -; CHECK: broadcastsd -; CHECK: ret define <4 x double> @_inreg3(double %scalar) nounwind uwtable readnone ssp { +; CHECK-LABEL: _inreg3: +; CHECK: ## BB#0: +; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 +; CHECK-NEXT: retq %in = insertelement <4 x double> undef, double %scalar, i32 0 %wide = shufflevector <4 x double> %in, <4 x double> undef, <4 x i32> zeroinitializer ret <4 x double> %wide } -;CHECK-LABEL: _inreg8xfloat: -;CHECK: vbroadcastss -;CHECK: ret define <8 x float> @_inreg8xfloat(<8 x float> %a) { +; CHECK-LABEL: _inreg8xfloat: +; CHECK: ## BB#0: +; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 +; CHECK-NEXT: retq %b = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> zeroinitializer ret <8 x float> %b } -;CHECK-LABEL: _inreg4xfloat: -;CHECK: vbroadcastss -;CHECK: ret define <4 x float> @_inreg4xfloat(<4 x float> %a) { +; CHECK-LABEL: _inreg4xfloat: +; CHECK: ## BB#0: +; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 +; CHECK-NEXT: retq %b = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer ret <4 x float> %b } -;CHECK-LABEL: _inreg16xi16: -;CHECK: vpbroadcastw -;CHECK: ret define <16 x i16> @_inreg16xi16(<16 x i16> %a) { +; CHECK-LABEL: _inreg16xi16: +; CHECK: ## BB#0: +; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0 +; CHECK-NEXT: retq %b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer ret <16 x i16> %b } -;CHECK-LABEL: _inreg8xi16: -;CHECK: vpbroadcastw -;CHECK: ret define <8 x i16> @_inreg8xi16(<8 x i16> %a) { +; CHECK-LABEL: _inreg8xi16: +; CHECK: ## BB#0: +; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0 +; CHECK-NEXT: retq %b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer ret <8 x i16> %b } - -;CHECK-LABEL: _inreg4xi64: -;CHECK: vbroadcastsd -;CHECK: ret define <4 x i64> @_inreg4xi64(<4 x i64> %a) { +; CHECK-LABEL: _inreg4xi64: +; CHECK: ## BB#0: +; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 +; CHECK-NEXT: retq %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer ret <4 x i64> %b } -;CHECK-LABEL: _inreg2xi64: -;CHECK: vpbroadcastq -;CHECK: ret define <2 x i64> @_inreg2xi64(<2 x i64> %a) { +; CHECK-LABEL: _inreg2xi64: +; CHECK: ## BB#0: +; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0 +; CHECK-NEXT: retq %b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer ret <2 x i64> %b } -;CHECK-LABEL: _inreg4xdouble: -;CHECK: vbroadcastsd -;CHECK: ret define <4 x double> @_inreg4xdouble(<4 x double> %a) { +; CHECK-LABEL: _inreg4xdouble: +; CHECK: ## BB#0: +; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 +; CHECK-NEXT: retq %b = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> zeroinitializer ret <4 x double> %b } -;CHECK-LABEL: _inreg2xdouble: -;CHECK: vmovddup -;CHECK: ret define <2 x double> @_inreg2xdouble(<2 x double> %a) { +; CHECK-LABEL: _inreg2xdouble: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; CHECK-NEXT: retq %b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer ret <2 x double> %b } -;CHECK-LABEL: _inreg8xi32: -;CHECK: vbroadcastss -;CHECK: ret define <8 x i32> @_inreg8xi32(<8 x i32> %a) { +; CHECK-LABEL: _inreg8xi32: +; CHECK: ## BB#0: +; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 +; CHECK-NEXT: retq %b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer ret <8 x i32> %b } -;CHECK-LABEL: _inreg4xi32: -;CHECK: vbroadcastss -;CHECK: ret define <4 x i32> @_inreg4xi32(<4 x i32> %a) { +; CHECK-LABEL: _inreg4xi32: +; CHECK: ## BB#0: +; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 +; CHECK-NEXT: retq %b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %b } -;CHECK-LABEL: _inreg32xi8: -;CHECK: vpbroadcastb -;CHECK: ret define <32 x i8> @_inreg32xi8(<32 x i8> %a) { +; CHECK-LABEL: _inreg32xi8: +; CHECK: ## BB#0: +; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0 +; CHECK-NEXT: retq %b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer ret <32 x i8> %b } -;CHECK-LABEL: _inreg16xi8: -;CHECK: vpbroadcastb -;CHECK: ret define <16 x i8> @_inreg16xi8(<16 x i8> %a) { +; CHECK-LABEL: _inreg16xi8: +; CHECK: ## BB#0: +; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0 +; CHECK-NEXT: retq %b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer ret <16 x i8> %b } @@ -360,11 +629,11 @@ define <16 x i8> @_inreg16xi8(<16 x i8> %a) { ; formed from a concat_vectors (via the shufflevector) of two BUILD_VECTORs ; (via the insertelements). -; CHECK-LABEL: splat_concat1 -; CHECK-NOT: vinsertf128 -; CHECK: vbroadcastss -; CHECK-NEXT: ret define <8 x float> @splat_concat1(float %f) { +; CHECK-LABEL: splat_concat1: +; CHECK: ## BB#0: +; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 +; CHECK-NEXT: retq %1 = insertelement <4 x float> undef, float %f, i32 0 %2 = insertelement <4 x float> %1, float %f, i32 1 %3 = insertelement <4 x float> %2, float %f, i32 2 @@ -373,11 +642,11 @@ define <8 x float> @splat_concat1(float %f) { ret <8 x float> %5 } -; CHECK-LABEL: splat_concat2 -; CHECK-NOT: vinsertf128 -; CHECK: vbroadcastss -; CHECK-NEXT: ret define <8 x float> @splat_concat2(float %f) { +; CHECK-LABEL: splat_concat2: +; CHECK: ## BB#0: +; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 +; CHECK-NEXT: retq %1 = insertelement <4 x float> undef, float %f, i32 0 %2 = insertelement <4 x float> %1, float %f, i32 1 %3 = insertelement <4 x float> %2, float %f, i32 2 @@ -390,22 +659,22 @@ define <8 x float> @splat_concat2(float %f) { ret <8 x float> %9 } -; CHECK-LABEL: splat_concat3 -; CHECK-NOT: vinsertf128 -; CHECK: vbroadcastsd -; CHECK-NEXT: ret define <4 x double> @splat_concat3(double %d) { +; CHECK-LABEL: splat_concat3: +; CHECK: ## BB#0: +; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 +; CHECK-NEXT: retq %1 = insertelement <2 x double> undef, double %d, i32 0 %2 = insertelement <2 x double> %1, double %d, i32 1 %3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> ret <4 x double> %3 } -; CHECK-LABEL: splat_concat4 -; CHECK-NOT: vinsertf128 -; CHECK: vbroadcastsd -; CHECK-NEXT: ret define <4 x double> @splat_concat4(double %d) { +; CHECK-LABEL: splat_concat4: +; CHECK: ## BB#0: +; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 +; CHECK-NEXT: retq %1 = insertelement <2 x double> undef, double %d, i32 0 %2 = insertelement <2 x double> %1, double %d, i32 1 %3 = insertelement <2 x double> undef, double %d, i32 0 |