diff options
Diffstat (limited to 'test/CodeGen/X86/sse-intrinsics-x86.ll')
-rw-r--r-- | test/CodeGen/X86/sse-intrinsics-x86.ll | 449 |
1 files changed, 379 insertions, 70 deletions
diff --git a/test/CodeGen/X86/sse-intrinsics-x86.ll b/test/CodeGen/X86/sse-intrinsics-x86.ll index 0857189be734..1df432185701 100644 --- a/test/CodeGen/X86/sse-intrinsics-x86.ll +++ b/test/CodeGen/X86/sse-intrinsics-x86.ll @@ -1,8 +1,17 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse | FileCheck %s -; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s +; NOTE: Assertions have been autogenerated by update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse | FileCheck %s --check-prefix=SSE +; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: addss +; SSE-LABEL: test_x86_sse_add_ss: +; SSE: ## BB#0: +; SSE-NEXT: addss %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_add_ss: +; KNL: ## BB#0: +; KNL-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -10,7 +19,15 @@ declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind read define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: cmpordps +; SSE-LABEL: test_x86_sse_cmp_ps: +; SSE: ## BB#0: +; SSE-NEXT: cmpordps %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_cmp_ps: +; KNL: ## BB#0: +; KNL-NEXT: vcmpordps %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -18,7 +35,15 @@ declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: cmpordss +; SSE-LABEL: test_x86_sse_cmp_ss: +; SSE: ## BB#0: +; SSE-NEXT: cmpordss %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_cmp_ss: +; KNL: ## BB#0: +; KNL-NEXT: vcmpordss %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -26,9 +51,23 @@ declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: comiss - ; CHECK: sete - ; CHECK: movzbl +; SSE-LABEL: test_x86_sse_comieq_ss: +; SSE: ## BB#0: +; SSE-NEXT: comiss %xmm1, %xmm0 +; SSE-NEXT: setnp %al +; SSE-NEXT: sete %cl +; SSE-NEXT: andb %al, %cl +; SSE-NEXT: movzbl %cl, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_comieq_ss: +; KNL: ## BB#0: +; KNL-NEXT: vcomiss %xmm1, %xmm0 +; KNL-NEXT: setnp %al +; KNL-NEXT: sete %cl +; KNL-NEXT: andb %al, %cl +; KNL-NEXT: movzbl %cl, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] ret i32 %res } @@ -36,9 +75,19 @@ declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: comiss - ; CHECK: setae - ; CHECK: movzbl +; SSE-LABEL: test_x86_sse_comige_ss: +; SSE: ## BB#0: +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: comiss %xmm1, %xmm0 +; SSE-NEXT: setae %al +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_comige_ss: +; KNL: ## BB#0: +; KNL-NEXT: xorl %eax, %eax +; KNL-NEXT: vcomiss %xmm1, %xmm0 +; KNL-NEXT: setae %al +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] ret i32 %res } @@ -46,9 +95,19 @@ declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: comiss - ; CHECK: seta - ; CHECK: movzbl +; SSE-LABEL: test_x86_sse_comigt_ss: +; SSE: ## BB#0: +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: comiss %xmm1, %xmm0 +; SSE-NEXT: seta %al +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_comigt_ss: +; KNL: ## BB#0: +; KNL-NEXT: xorl %eax, %eax +; KNL-NEXT: vcomiss %xmm1, %xmm0 +; KNL-NEXT: seta %al +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] ret i32 %res } @@ -56,9 +115,19 @@ declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: comiss - ; CHECK: setbe - ; CHECK: movzbl +; SSE-LABEL: test_x86_sse_comile_ss: +; SSE: ## BB#0: +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: comiss %xmm0, %xmm1 +; SSE-NEXT: setae %al +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_comile_ss: +; KNL: ## BB#0: +; KNL-NEXT: xorl %eax, %eax +; KNL-NEXT: vcomiss %xmm0, %xmm1 +; KNL-NEXT: setae %al +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] ret i32 %res } @@ -66,8 +135,19 @@ declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: comiss - ; CHECK: sbb +; SSE-LABEL: test_x86_sse_comilt_ss: +; SSE: ## BB#0: +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: comiss %xmm0, %xmm1 +; SSE-NEXT: seta %al +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_comilt_ss: +; KNL: ## BB#0: +; KNL-NEXT: xorl %eax, %eax +; KNL-NEXT: vcomiss %xmm0, %xmm1 +; KNL-NEXT: seta %al +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] ret i32 %res } @@ -75,9 +155,23 @@ declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: comiss - ; CHECK: setne - ; CHECK: movzbl +; SSE-LABEL: test_x86_sse_comineq_ss: +; SSE: ## BB#0: +; SSE-NEXT: comiss %xmm1, %xmm0 +; SSE-NEXT: setp %al +; SSE-NEXT: setne %cl +; SSE-NEXT: orb %al, %cl +; SSE-NEXT: movzbl %cl, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_comineq_ss: +; KNL: ## BB#0: +; KNL-NEXT: vcomiss %xmm1, %xmm0 +; KNL-NEXT: setp %al +; KNL-NEXT: setne %cl +; KNL-NEXT: orb %al, %cl +; KNL-NEXT: movzbl %cl, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] ret i32 %res } @@ -85,8 +179,17 @@ declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) { - ; CHECK: movl - ; CHECK: cvtsi2ss +; SSE-LABEL: test_x86_sse_cvtsi2ss: +; SSE: ## BB#0: +; SSE-NEXT: movl $7, %eax +; SSE-NEXT: cvtsi2ssl %eax, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_cvtsi2ss: +; KNL: ## BB#0: +; KNL-NEXT: movl $7, %eax +; KNL-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -94,7 +197,15 @@ declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) { - ; CHECK: cvtss2si +; SSE-LABEL: test_x86_sse_cvtss2si: +; SSE: ## BB#0: +; SSE-NEXT: cvtss2si %xmm0, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_cvtss2si: +; KNL: ## BB#0: +; KNL-NEXT: vcvtss2si %xmm0, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1] ret i32 %res } @@ -102,7 +213,15 @@ declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) { - ; CHECK: cvttss2si +; SSE-LABEL: test_x86_sse_cvttss2si: +; SSE: ## BB#0: +; SSE-NEXT: cvttss2si %xmm0, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_cvttss2si: +; KNL: ## BB#0: +; KNL-NEXT: vcvttss2si %xmm0, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1] ret i32 %res } @@ -110,7 +229,15 @@ declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: divss +; SSE-LABEL: test_x86_sse_div_ss: +; SSE: ## BB#0: +; SSE-NEXT: divss %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_div_ss: +; KNL: ## BB#0: +; KNL-NEXT: vdivss %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -118,8 +245,17 @@ declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind read define void @test_x86_sse_ldmxcsr(i8* %a0) { - ; CHECK: movl - ; CHECK: ldmxcsr +; SSE-LABEL: test_x86_sse_ldmxcsr: +; SSE: ## BB#0: +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-NEXT: ldmxcsr (%eax) +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_ldmxcsr: +; KNL: ## BB#0: +; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax +; KNL-NEXT: vldmxcsr (%eax) +; KNL-NEXT: retl call void @llvm.x86.sse.ldmxcsr(i8* %a0) ret void } @@ -128,7 +264,15 @@ declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: maxps +; SSE-LABEL: test_x86_sse_max_ps: +; SSE: ## BB#0: +; SSE-NEXT: maxps %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_max_ps: +; KNL: ## BB#0: +; KNL-NEXT: vmaxps %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -136,7 +280,15 @@ declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind read define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: maxss +; SSE-LABEL: test_x86_sse_max_ss: +; SSE: ## BB#0: +; SSE-NEXT: maxss %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_max_ss: +; KNL: ## BB#0: +; KNL-NEXT: vmaxss %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -144,7 +296,15 @@ declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind read define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: minps +; SSE-LABEL: test_x86_sse_min_ps: +; SSE: ## BB#0: +; SSE-NEXT: minps %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_min_ps: +; KNL: ## BB#0: +; KNL-NEXT: vminps %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -152,7 +312,15 @@ declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind read define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: minss +; SSE-LABEL: test_x86_sse_min_ss: +; SSE: ## BB#0: +; SSE-NEXT: minss %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_min_ss: +; KNL: ## BB#0: +; KNL-NEXT: vminss %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -160,7 +328,15 @@ declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind read define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) { - ; CHECK: movmskps +; SSE-LABEL: test_x86_sse_movmsk_ps: +; SSE: ## BB#0: +; SSE-NEXT: movmskps %xmm0, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_movmsk_ps: +; KNL: ## BB#0: +; KNL-NEXT: vmovmskps %xmm0, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1] ret i32 %res } @@ -169,7 +345,15 @@ declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: mulss +; SSE-LABEL: test_x86_sse_mul_ss: +; SSE: ## BB#0: +; SSE-NEXT: mulss %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_mul_ss: +; KNL: ## BB#0: +; KNL-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -177,7 +361,15 @@ declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind read define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) { - ; CHECK: rcpps +; SSE-LABEL: test_x86_sse_rcp_ps: +; SSE: ## BB#0: +; SSE-NEXT: rcpps %xmm0, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_rcp_ps: +; KNL: ## BB#0: +; KNL-NEXT: vrcpps %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -185,7 +377,15 @@ declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) { - ; CHECK: rcpss +; SSE-LABEL: test_x86_sse_rcp_ss: +; SSE: ## BB#0: +; SSE-NEXT: rcpss %xmm0, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_rcp_ss: +; KNL: ## BB#0: +; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -193,7 +393,15 @@ declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) { - ; CHECK: rsqrtps +; SSE-LABEL: test_x86_sse_rsqrt_ps: +; SSE: ## BB#0: +; SSE-NEXT: rsqrtps %xmm0, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_rsqrt_ps: +; KNL: ## BB#0: +; KNL-NEXT: vrsqrtps %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -201,7 +409,15 @@ declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) { - ; CHECK: rsqrtss +; SSE-LABEL: test_x86_sse_rsqrt_ss: +; SSE: ## BB#0: +; SSE-NEXT: rsqrtss %xmm0, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_rsqrt_ss: +; KNL: ## BB#0: +; KNL-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -209,7 +425,15 @@ declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) { - ; CHECK: sqrtps +; SSE-LABEL: test_x86_sse_sqrt_ps: +; SSE: ## BB#0: +; SSE-NEXT: sqrtps %xmm0, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_sqrt_ps: +; KNL: ## BB#0: +; KNL-NEXT: vsqrtps %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -217,7 +441,15 @@ declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) { - ; CHECK: sqrtss +; SSE-LABEL: test_x86_sse_sqrt_ss: +; SSE: ## BB#0: +; SSE-NEXT: sqrtss %xmm0, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_sqrt_ss: +; KNL: ## BB#0: +; KNL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -225,25 +457,33 @@ declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone define void @test_x86_sse_stmxcsr(i8* %a0) { - ; CHECK: movl - ; CHECK: stmxcsr +; SSE-LABEL: test_x86_sse_stmxcsr: +; SSE: ## BB#0: +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-NEXT: stmxcsr (%eax) +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_stmxcsr: +; KNL: ## BB#0: +; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax +; KNL-NEXT: vstmxcsr (%eax) +; KNL-NEXT: retl call void @llvm.x86.sse.stmxcsr(i8* %a0) ret void } declare void @llvm.x86.sse.stmxcsr(i8*) nounwind -define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) { - ; CHECK: movl - ; CHECK: movups - call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1) - ret void -} -declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind - - define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: subss +; SSE-LABEL: test_x86_sse_sub_ss: +; SSE: ## BB#0: +; SSE-NEXT: subss %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_sub_ss: +; KNL: ## BB#0: +; KNL-NEXT: vsubss %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -251,9 +491,23 @@ declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind read define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: ucomiss - ; CHECK: sete - ; CHECK: movzbl +; SSE-LABEL: test_x86_sse_ucomieq_ss: +; SSE: ## BB#0: +; SSE-NEXT: ucomiss %xmm1, %xmm0 +; SSE-NEXT: setnp %al +; SSE-NEXT: sete %cl +; SSE-NEXT: andb %al, %cl +; SSE-NEXT: movzbl %cl, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_ucomieq_ss: +; KNL: ## BB#0: +; KNL-NEXT: vucomiss %xmm1, %xmm0 +; KNL-NEXT: setnp %al +; KNL-NEXT: sete %cl +; KNL-NEXT: andb %al, %cl +; KNL-NEXT: movzbl %cl, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] ret i32 %res } @@ -261,9 +515,19 @@ declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: ucomiss - ; CHECK: setae - ; CHECK: movzbl +; SSE-LABEL: test_x86_sse_ucomige_ss: +; SSE: ## BB#0: +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: ucomiss %xmm1, %xmm0 +; SSE-NEXT: setae %al +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_ucomige_ss: +; KNL: ## BB#0: +; KNL-NEXT: xorl %eax, %eax +; KNL-NEXT: vucomiss %xmm1, %xmm0 +; KNL-NEXT: setae %al +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] ret i32 %res } @@ -271,9 +535,19 @@ declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: ucomiss - ; CHECK: seta - ; CHECK: movzbl +; SSE-LABEL: test_x86_sse_ucomigt_ss: +; SSE: ## BB#0: +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: ucomiss %xmm1, %xmm0 +; SSE-NEXT: seta %al +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_ucomigt_ss: +; KNL: ## BB#0: +; KNL-NEXT: xorl %eax, %eax +; KNL-NEXT: vucomiss %xmm1, %xmm0 +; KNL-NEXT: seta %al +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] ret i32 %res } @@ -281,9 +555,19 @@ declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: ucomiss - ; CHECK: setbe - ; CHECK: movzbl +; SSE-LABEL: test_x86_sse_ucomile_ss: +; SSE: ## BB#0: +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: ucomiss %xmm0, %xmm1 +; SSE-NEXT: setae %al +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_ucomile_ss: +; KNL: ## BB#0: +; KNL-NEXT: xorl %eax, %eax +; KNL-NEXT: vucomiss %xmm0, %xmm1 +; KNL-NEXT: setae %al +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] ret i32 %res } @@ -291,8 +575,19 @@ declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: ucomiss - ; CHECK: sbbl +; SSE-LABEL: test_x86_sse_ucomilt_ss: +; SSE: ## BB#0: +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: ucomiss %xmm0, %xmm1 +; SSE-NEXT: seta %al +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_ucomilt_ss: +; KNL: ## BB#0: +; KNL-NEXT: xorl %eax, %eax +; KNL-NEXT: vucomiss %xmm0, %xmm1 +; KNL-NEXT: seta %al +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] ret i32 %res } @@ -300,9 +595,23 @@ declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: ucomiss - ; CHECK: setne - ; CHECK: movzbl +; SSE-LABEL: test_x86_sse_ucomineq_ss: +; SSE: ## BB#0: +; SSE-NEXT: ucomiss %xmm1, %xmm0 +; SSE-NEXT: setp %al +; SSE-NEXT: setne %cl +; SSE-NEXT: orb %al, %cl +; SSE-NEXT: movzbl %cl, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_ucomineq_ss: +; KNL: ## BB#0: +; KNL-NEXT: vucomiss %xmm1, %xmm0 +; KNL-NEXT: setp %al +; KNL-NEXT: setne %cl +; KNL-NEXT: orb %al, %cl +; KNL-NEXT: movzbl %cl, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] ret i32 %res } |