diff options
author | Roman Divacky <rdivacky@FreeBSD.org> | 2010-07-13 17:19:57 +0000 |
---|---|---|
committer | Roman Divacky <rdivacky@FreeBSD.org> | 2010-07-13 17:19:57 +0000 |
commit | 66e41e3c6e8b8fbc48d5d3b4d2bd9ce0be4ecb75 (patch) | |
tree | 9de1c5f67a98cd0e73c60838396486c984f63ac2 /test/CodeGen/X86 | |
parent | abdf259d487163e72081a8cf4991b1617206b41e (diff) | |
download | src-66e41e3c6e8b8fbc48d5d3b4d2bd9ce0be4ecb75.tar.gz src-66e41e3c6e8b8fbc48d5d3b4d2bd9ce0be4ecb75.zip |
Update LLVM to r108243.vendor/llvm/llvm-r108243
Notes
Notes:
svn path=/vendor/llvm/dist/; revision=210006
svn path=/vendor/llvm/llvm-r108243/; revision=210077; tag=vendor/llvm/llvm-r108243
Diffstat (limited to 'test/CodeGen/X86')
103 files changed, 1760 insertions, 708 deletions
diff --git a/test/CodeGen/X86/2006-11-17-IllegalMove.ll b/test/CodeGen/X86/2006-11-17-IllegalMove.ll index e839d7295adc..affb7afb1c51 100644 --- a/test/CodeGen/X86/2006-11-17-IllegalMove.ll +++ b/test/CodeGen/X86/2006-11-17-IllegalMove.ll @@ -15,14 +15,14 @@ bb77: ; preds = %entry, %entry %tmp99 = udiv i64 0, 0 ; <i64> [#uses=1] %tmp = load i8* null ; <i8> [#uses=1] %tmp114 = icmp eq i64 0, 0 ; <i1> [#uses=1] - br i1 %tmp114, label %cond_true115, label %cond_next136 + br label %cond_true115 bb84: ; preds = %entry ret void cond_true115: ; preds = %bb77 %tmp118 = load i8* null ; <i8> [#uses=1] - br i1 false, label %cond_next129, label %cond_true120 + br label %cond_true120 cond_true120: ; preds = %cond_true115 %tmp127 = udiv i8 %tmp, %tmp118 ; <i8> [#uses=1] @@ -30,7 +30,7 @@ cond_true120: ; preds = %cond_true115 br label %cond_next129 cond_next129: ; preds = %cond_true120, %cond_true115 - %iftmp.30.0 = phi i64 [ %tmp127.upgrd.1, %cond_true120 ], [ 0, %cond_true115 ] ; <i64> [#uses=1] + %iftmp.30.0 = phi i64 [ %tmp127.upgrd.1, %cond_true120 ] ; <i64> [#uses=1] %tmp132 = icmp eq i64 %iftmp.30.0, %tmp99 ; <i1> [#uses=1] br i1 %tmp132, label %cond_false148, label %cond_next136 diff --git a/test/CodeGen/X86/2007-01-08-InstrSched.ll b/test/CodeGen/X86/2007-01-08-InstrSched.ll index ef19d72150a0..6f8b89c3240d 100644 --- a/test/CodeGen/X86/2007-01-08-InstrSched.ll +++ b/test/CodeGen/X86/2007-01-08-InstrSched.ll @@ -11,12 +11,12 @@ define float @foo(float %x) nounwind { %tmp14 = fadd float %tmp12, %tmp7 ret float %tmp14 -; CHECK: mulss LCPI0_0(%rip) -; CHECK: mulss LCPI0_1(%rip) +; CHECK: mulss +; CHECK: mulss ; CHECK: addss -; CHECK: mulss LCPI0_2(%rip) +; CHECK: mulss ; CHECK: addss -; CHECK: mulss LCPI0_3(%rip) +; CHECK: mulss ; CHECK: addss ; CHECK: ret } diff --git a/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll b/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll index fc11347224be..db13fde9f677 100644 --- a/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll +++ b/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll @@ -9,10 +9,7 @@ cond_next127: ; preds = %cond_next391, %entry %tmp149 = mul i32 0, %v.1 ; <i32> [#uses=0] %tmp254 = and i32 0, 15 ; <i32> [#uses=1] %tmp256 = and i32 0, 15 ; <i32> [#uses=2] - br i1 false, label %cond_true267, label %cond_next391 - -cond_true267: ; preds = %cond_next127 - ret i16 0 + br label %cond_next391 cond_next391: ; preds = %cond_next127 %tmp393 = load i32* %ss, align 4 ; <i32> [#uses=1] diff --git a/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll b/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll index 8e586a7059eb..228a915e3e5a 100644 --- a/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll +++ b/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -relocation-model=static | grep {foo _str$} +; RUN: llc < %s -relocation-model=static | grep {foo str$} ; PR1761 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" -target triple = "x86_64-apple-darwin8" +target triple = "x86_64-pc-linux" @str = internal constant [12 x i8] c"init/main.c\00" ; <[12 x i8]*> [#uses=1] define i32 @unknown_bootoption() { diff --git a/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll b/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll deleted file mode 100644 index 9c004f946b4a..000000000000 --- a/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll +++ /dev/null @@ -1,68 +0,0 @@ -; RUN: llc < %s -march=x86 -x86-asm-syntax=att | grep movl | count 2 -; RUN: llc < %s -march=x86 -x86-asm-syntax=att | not grep movb - - %struct.double_int = type { i64, i64 } - %struct.tree_common = type <{ i8, [3 x i8] }> - %struct.tree_int_cst = type { %struct.tree_common, %struct.double_int } - %struct.tree_node = type { %struct.tree_int_cst } -@tree_code_type = external constant [0 x i32] ; <[0 x i32]*> [#uses=1] - -define i32 @simple_cst_equal(%struct.tree_node* %t1, %struct.tree_node* %t2) nounwind { -entry: - %tmp2526 = bitcast %struct.tree_node* %t1 to i32* ; <i32*> [#uses=1] - br i1 false, label %UnifiedReturnBlock, label %bb21 - -bb21: ; preds = %entry - %tmp27 = load i32* %tmp2526, align 4 ; <i32> [#uses=1] - %tmp29 = and i32 %tmp27, 255 ; <i32> [#uses=3] - %tmp2930 = trunc i32 %tmp29 to i8 ; <i8> [#uses=1] - %tmp37 = load i32* null, align 4 ; <i32> [#uses=1] - %tmp39 = and i32 %tmp37, 255 ; <i32> [#uses=2] - %tmp3940 = trunc i32 %tmp39 to i8 ; <i8> [#uses=1] - %tmp43 = add i32 %tmp29, -3 ; <i32> [#uses=1] - %tmp44 = icmp ult i32 %tmp43, 3 ; <i1> [#uses=1] - br i1 %tmp44, label %bb47.split, label %bb76 - -bb47.split: ; preds = %bb21 - ret i32 0 - -bb76: ; preds = %bb21 - br i1 false, label %bb82, label %bb146.split - -bb82: ; preds = %bb76 - %tmp94 = getelementptr [0 x i32]* @tree_code_type, i32 0, i32 %tmp39 ; <i32*> [#uses=1] - %tmp95 = load i32* %tmp94, align 4 ; <i32> [#uses=1] - %tmp9596 = trunc i32 %tmp95 to i8 ; <i8> [#uses=1] - %tmp98 = add i8 %tmp9596, -4 ; <i8> [#uses=1] - %tmp99 = icmp ugt i8 %tmp98, 5 ; <i1> [#uses=1] - br i1 %tmp99, label %bb102, label %bb106 - -bb102: ; preds = %bb82 - ret i32 0 - -bb106: ; preds = %bb82 - ret i32 0 - -bb146.split: ; preds = %bb76 - %tmp149 = icmp eq i8 %tmp2930, %tmp3940 ; <i1> [#uses=1] - br i1 %tmp149, label %bb153, label %UnifiedReturnBlock - -bb153: ; preds = %bb146.split - switch i32 %tmp29, label %UnifiedReturnBlock [ - i32 0, label %bb155 - i32 1, label %bb187 - ] - -bb155: ; preds = %bb153 - ret i32 0 - -bb187: ; preds = %bb153 - %tmp198 = icmp eq %struct.tree_node* %t1, %t2 ; <i1> [#uses=1] - br i1 %tmp198, label %bb201, label %UnifiedReturnBlock - -bb201: ; preds = %bb187 - ret i32 0 - -UnifiedReturnBlock: ; preds = %bb187, %bb153, %bb146.split, %entry - ret i32 0 -} diff --git a/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll b/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll index 8aabb528a49c..0091397ca6b0 100644 --- a/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll +++ b/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -regalloc=local ; RUN: llc < %s -march=x86 -mattr=+sse2 -regalloc=fast define void @SolveCubic(double %a, double %b, double %c, double %d, i32* %solutions, double* %x) { diff --git a/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll b/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll index d294885f8255..da029079c6ff 100644 --- a/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll +++ b/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -regalloc=local -march=x86 -mattr=+mmx | grep esi ; RUN: llc < %s -regalloc=fast -march=x86 -mattr=+mmx | grep esi ; PR2082 ; Local register allocator was refusing to use ESI, EDI, and EBP so it ran out of diff --git a/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll b/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll index cd2d609b5356..40aafb4c54d5 100644 --- a/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll +++ b/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll @@ -1,5 +1,4 @@ ; RUN: llc < %s -mtriple=i386-pc-linux-gnu -relocation-model=pic -disable-fp-elim -; RUN: llc < %s -mtriple=i386-pc-linux-gnu -relocation-model=pic -disable-fp-elim -schedule-livein-copies | not grep {Number of register spills} ; PR2134 declare fastcc i8* @w_addchar(i8*, i32*, i32*, i8 signext ) nounwind diff --git a/test/CodeGen/X86/2008-03-18-CoalescerBug.ll b/test/CodeGen/X86/2008-03-18-CoalescerBug.ll index ccc4d754c1f5..3ae502619725 100644 --- a/test/CodeGen/X86/2008-03-18-CoalescerBug.ll +++ b/test/CodeGen/X86/2008-03-18-CoalescerBug.ll @@ -19,7 +19,7 @@ bb33: ; preds = %entry br i1 %tmp35, label %bb38, label %bb87.preheader bb38: ; preds = %bb33 %tmp53 = add i32 %tmp19, %delta ; <i32> [#uses=2] - br i1 false, label %bb50, label %bb43 + br label %bb43 bb43: ; preds = %bb38 store i32 %tmp53, i32* null, align 4 ret void diff --git a/test/CodeGen/X86/2008-04-09-BranchFolding.ll b/test/CodeGen/X86/2008-04-09-BranchFolding.ll index 41fbdd19f2b2..f4b2d719ae14 100644 --- a/test/CodeGen/X86/2008-04-09-BranchFolding.ll +++ b/test/CodeGen/X86/2008-04-09-BranchFolding.ll @@ -10,7 +10,7 @@ define fastcc %struct.tree_node* @pushdecl(%struct.tree_node* %x) nounwind { entry: %tmp3.i40 = icmp eq %struct.binding_level* null, null ; <i1> [#uses=2] - br i1 false, label %bb143, label %bb140 + br label %bb140 bb140: ; preds = %entry br i1 %tmp3.i40, label %bb160, label %bb17.i bb17.i: ; preds = %bb140 diff --git a/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll b/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll index 716563b15811..0742371dc9ba 100644 --- a/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll +++ b/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll @@ -1,5 +1,4 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=pic -disable-fp-elim -O0 -regalloc=local ; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=pic -disable-fp-elim -O0 -regalloc=fast ; PR5534 diff --git a/test/CodeGen/X86/2008-05-21-CoalescerBug.ll b/test/CodeGen/X86/2008-05-21-CoalescerBug.ll index 9cf50f4bfc58..e5dda4ac754c 100644 --- a/test/CodeGen/X86/2008-05-21-CoalescerBug.ll +++ b/test/CodeGen/X86/2008-05-21-CoalescerBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -O0 -fast-isel=false | grep mov | count 5 +; RUN: llc < %s -march=x86 -O0 -fast-isel=false -regalloc=linearscan | grep mov | count 5 ; PR2343 %llvm.dbg.anchor.type = type { i32, i32 } diff --git a/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll b/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll index 5929aff3da55..94c95d40c65e 100644 --- a/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll +++ b/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -regalloc=local ; RUN: llc < %s -mtriple=i386-apple-darwin -regalloc=fast @_ZTVN10Evaluation10GridOutputILi3EEE = external constant [5 x i32 (...)*] ; <[5 x i32 (...)*]*> [#uses=1] diff --git a/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll b/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll index eacb4a51c215..ce9e389fb35c 100644 --- a/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll +++ b/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll @@ -7,7 +7,7 @@ define double @_Z7qstrtodPKcPS0_Pb(i8* %s00, i8** %se, i8* %ok) nounwind { entry: - br i1 false, label %bb151, label %bb163 + br label %bb163 bb151: ; preds = %entry br label %bb163 @@ -19,13 +19,13 @@ bb163: ; preds = %bb151, %entry br label %bb5.i bb5.i: ; preds = %bb5.i57.i, %bb163 - %b.0.i = phi %struct.Bigint* [ null, %bb163 ], [ %tmp9.i.i41.i, %bb5.i57.i ] ; <%struct.Bigint*> [#uses=1] + %b.0.i = phi %struct.Bigint* [ null, %bb163 ] ; <%struct.Bigint*> [#uses=1] %tmp3.i7.i728 = load i32* null, align 4 ; <i32> [#uses=1] br label %bb.i27.i bb.i27.i: ; preds = %bb.i27.i, %bb5.i %tmp23.i20.i = lshr i32 0, 16 ; <i32> [#uses=1] - br i1 false, label %bb.i27.i, label %bb5.i57.i + br label %bb5.i57.i bb5.i57.i: ; preds = %bb.i27.i %tmp50.i35.i = load i32* null, align 4 ; <i32> [#uses=1] @@ -41,7 +41,7 @@ bb5.i57.i: ; preds = %bb.i27.i store i32 %tmp23.i20.i, i32* null, align 4 %tmp74.i61.i = add i32 %tmp3.i7.i728, 1 ; <i32> [#uses=1] store i32 %tmp74.i61.i, i32* null, align 4 - br i1 false, label %bb5.i, label %bb7.i + br label %bb7.i bb7.i: ; preds = %bb5.i57.i %tmp514 = load i32* null, align 4 ; <i32> [#uses=1] diff --git a/test/CodeGen/X86/2008-09-17-inline-asm-1.ll b/test/CodeGen/X86/2008-09-17-inline-asm-1.ll index dd8333608260..3c64fe45c997 100644 --- a/test/CodeGen/X86/2008-09-17-inline-asm-1.ll +++ b/test/CodeGen/X86/2008-09-17-inline-asm-1.ll @@ -1,5 +1,4 @@ ; RUN: llc < %s -march=x86 | FileCheck %s -; RUN: llc < %s -march=x86 -regalloc=local | FileCheck %s ; RUN: llc < %s -march=x86 -regalloc=fast | FileCheck %s ; %0 must not be put in EAX or EDX. diff --git a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll index 440094058c24..eadfda0394dd 100644 --- a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll +++ b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll @@ -1,6 +1,5 @@ -; RUN: llc < %s -march=x86 | grep "#%ebp %esi %edi 8(%edx) %eax (%ebx)" -; RUN: llc < %s -march=x86 -regalloc=local | grep "#%edi %ebp %edx 8(%ebx) %eax (%esi)" -; RUN: llc < %s -march=x86 -regalloc=fast | grep "#%edi %ebp %edx 8(%ebx) %eax (%esi)" +; RUN: llc < %s -march=x86 | grep "#%ebp %edi %ebx 8(%esi) %eax %dl" +; RUN: llc < %s -march=x86 -regalloc=fast | grep "#%ebx %esi %edi 8(%ebp) %eax %dl" ; The 1st, 2nd, 3rd and 5th registers above must all be different. The registers ; referenced in the 4th and 6th operands must not be the same as the 1st or 5th diff --git a/test/CodeGen/X86/2008-10-16-SpillerBug.ll b/test/CodeGen/X86/2008-10-16-SpillerBug.ll deleted file mode 100644 index 87305a0b3116..000000000000 --- a/test/CodeGen/X86/2008-10-16-SpillerBug.ll +++ /dev/null @@ -1,160 +0,0 @@ -; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin -stats |& grep asm-printer | grep 41 -; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin | FileCheck %s - - %struct.XXDActiveTextureTargets = type { i64, i64, i64, i64, i64, i64 } - %struct.XXDAlphaTest = type { float, i16, i8, i8 } - %struct.XXDArrayRange = type { i8, i8, i8, i8 } - %struct.XXDBlendMode = type { i16, i16, i16, i16, %struct.XXTColor4, i16, i16, i8, i8, i8, i8 } - %struct.XXDClearColor = type { double, %struct.XXTColor4, %struct.XXTColor4, float, i32 } - %struct.XXDClipPlane = type { i32, [6 x %struct.XXTColor4] } - %struct.XXDColorBuffer = type { i16, i8, i8, [8 x i16], i8, i8, i8, i8 } - %struct.XXDColorMatrix = type { [16 x float]*, %struct.XXDImagingCC } - %struct.XXDConvolution = type { %struct.XXTColor4, %struct.XXDImagingCC, i16, i16, [0 x i32], float*, i32, i32 } - %struct.XXDDepthTest = type { i16, i16, i8, i8, i8, i8, double, double } - %struct.XXDFixedFunction = type { %struct.YYToken* } - %struct.XXDFogMode = type { %struct.XXTColor4, float, float, float, float, float, i16, i16, i16, i8, i8 } - %struct.XXDHintMode = type { i16, i16, i16, i16, i16, i16, i16, i16, i16, i16 } - %struct.XXDHistogram = type { %struct.XXTFixedColor4*, i32, i16, i8, i8 } - %struct.XXDImagingCC = type { { float, float }, { float, float }, { float, float }, { float, float } } - %struct.XXDImagingSubset = type { %struct.XXDConvolution, %struct.XXDConvolution, %struct.XXDConvolution, %struct.XXDColorMatrix, %struct.XXDMinmax, %struct.XXDHistogram, %struct.XXDImagingCC, %struct.XXDImagingCC, %struct.XXDImagingCC, %struct.XXDImagingCC, i32, [0 x i32] } - %struct.XXDLight = type { %struct.XXTColor4, %struct.XXTColor4, %struct.XXTColor4, %struct.XXTColor4, %struct.XXTCoord3, float, float, float, float, float, %struct.XXTCoord3, float, %struct.XXTCoord3, float, %struct.XXTCoord3, float, float, float, float, float } - %struct.XXDLightModel = type { %struct.XXTColor4, [8 x %struct.XXDLight], [2 x %struct.XXDMaterial], i32, i16, i16, i16, i8, i8, i8, i8, i8, i8 } - %struct.XXDLightProduct = type { %struct.XXTColor4, %struct.XXTColor4, %struct.XXTColor4 } - %struct.XXDLineMode = type { float, i32, i16, i16, i8, i8, i8, i8 } - %struct.XXDLogicOp = type { i16, i8, i8 } - %struct.XXDMaskMode = type { i32, [3 x i32], i8, i8, i8, i8, i8, i8, i8, i8 } - %struct.XXDMaterial = type { %struct.XXTColor4, %struct.XXTColor4, %struct.XXTColor4, %struct.XXTColor4, float, float, float, float, [8 x %struct.XXDLightProduct], %struct.XXTColor4, [8 x i32] } - %struct.XXDMinmax = type { %struct.XXDMinmaxTable*, i16, i8, i8, [0 x i32] } - %struct.XXDMinmaxTable = type { %struct.XXTColor4, %struct.XXTColor4 } - %struct.XXDMultisample = type { float, i8, i8, i8, i8, i8, i8, i8, i8 } - %struct.XXDPipelineProgramState = type { i8, i8, i8, i8, [0 x i32], %struct.XXTColor4* } - %struct.XXDPixelMap = type { i32*, float*, float*, float*, float*, float*, float*, float*, float*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } - %struct.XXDPixelMode = type { float, float, %struct.XXDPixelStore, %struct.XXDPixelTransfer, %struct.XXDPixelMap, %struct.XXDImagingSubset, i32, i32 } - %struct.XXDPixelPack = type { i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8 } - %struct.XXDPixelStore = type { %struct.XXDPixelPack, %struct.XXDPixelPack } - %struct.XXDPixelTransfer = type { float, float, float, float, float, float, float, float, float, float, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float } - %struct.XXDPointMode = type { float, float, float, float, %struct.XXTCoord3, float, i8, i8, i8, i8, i16, i16, i32, i16, i16 } - %struct.XXDPolygonMode = type { [128 x i8], float, float, i16, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8 } - %struct.XXDRegisterCombiners = type { i8, i8, i8, i8, i32, [2 x %struct.XXTColor4], [8 x %struct.XXDRegisterCombinersPerStageState], %struct.XXDRegisterCombinersFinalStageState } - %struct.XXDRegisterCombinersFinalStageState = type { i8, i8, i8, i8, [7 x %struct.XXDRegisterCombinersPerVariableState] } - %struct.XXDRegisterCombinersPerPortionState = type { [4 x %struct.XXDRegisterCombinersPerVariableState], i8, i8, i8, i8, i16, i16, i16, i16, i16, i16 } - %struct.XXDRegisterCombinersPerStageState = type { [2 x %struct.XXDRegisterCombinersPerPortionState], [2 x %struct.XXTColor4] } - %struct.XXDRegisterCombinersPerVariableState = type { i16, i16, i16, i16 } - %struct.XXDScissorTest = type { %struct.XXTFixedColor4, i8, i8, i8, i8 } - %struct.XXDState = type <{ i16, i16, i16, i16, i32, i32, [256 x %struct.XXTColor4], [128 x %struct.XXTColor4], %struct.XXDViewport, %struct.XXDTransform, %struct.XXDLightModel, %struct.XXDActiveTextureTargets, %struct.XXDAlphaTest, %struct.XXDBlendMode, %struct.XXDClearColor, %struct.XXDColorBuffer, %struct.XXDDepthTest, %struct.XXDArrayRange, %struct.XXDFogMode, %struct.XXDHintMode, %struct.XXDLineMode, %struct.XXDLogicOp, %struct.XXDMaskMode, %struct.XXDPixelMode, %struct.XXDPointMode, %struct.XXDPolygonMode, %struct.XXDScissorTest, i32, %struct.XXDStencilTest, [8 x %struct.XXDTextureMode], [16 x %struct.XXDTextureImageMode], %struct.XXDArrayRange, [8 x %struct.XXDTextureCoordGen], %struct.XXDClipPlane, %struct.XXDMultisample, %struct.XXDRegisterCombiners, %struct.XXDArrayRange, %struct.XXDArrayRange, [3 x %struct.XXDPipelineProgramState], %struct.XXDArrayRange, %struct.XXDTransformFeedback, i32*, %struct.XXDFixedFunction, [3 x i32], [2 x i32] }> - %struct.XXDStencilTest = type { [3 x { i32, i32, i16, i16, i16, i16 }], i32, [4 x i8] } - %struct.XXDTextureCoordGen = type { { i16, i16, %struct.XXTColor4, %struct.XXTColor4 }, { i16, i16, %struct.XXTColor4, %struct.XXTColor4 }, { i16, i16, %struct.XXTColor4, %struct.XXTColor4 }, { i16, i16, %struct.XXTColor4, %struct.XXTColor4 }, i8, i8, i8, i8 } - %struct.XXDTextureImageMode = type { float } - %struct.XXDTextureMode = type { %struct.XXTColor4, i32, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, float, float, i16, i16, i16, i16, i16, i16, [4 x i16], i8, i8, i8, i8, [3 x float], [4 x float], float, float } - %struct.XXDTextureRec = type opaque - %struct.XXDTransform = type <{ [24 x [16 x float]], [24 x [16 x float]], [16 x float], float, float, float, float, float, i8, i8, i8, i8, i32, i32, i32, i16, i16, i8, i8, i8, i8, i32 }> - %struct.XXDTransformFeedback = type { i8, i8, i8, i8, [0 x i32], [16 x i32], [16 x i32] } - %struct.XXDViewport = type { float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, double, double, i32, i32, i32, i32, float, float, float, float } - %struct.XXTColor4 = type { float, float, float, float } - %struct.XXTCoord3 = type { float, float, float } - %struct.XXTFixedColor4 = type { i32, i32, i32, i32 } - %struct.XXVMTextures = type { [16 x %struct.XXDTextureRec*] } - %struct.XXVMVPContext = type { i32 } - %struct.XXVMVPStack = type { i32, i32 } - %struct.YYToken = type { { i16, i16, i32 } } - %struct._XXVMConstants = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, float, float, float, float, float, float, float, float, float, float, float, float, [256 x float], [4096 x i8], [8 x float], [48 x float], [128 x float], [528 x i8], { void (i8*, i8*, i32, i8*)*, float (float)*, float (float)*, float (float)*, i32 (float)* } } -@llvm.used = appending global [1 x i8*] [ i8* bitcast (void (%struct.XXDState*, <4 x float>*, <4 x float>**, %struct._XXVMConstants*, %struct.YYToken*, %struct.XXVMVPContext*, %struct.XXVMTextures*, %struct.XXVMVPStack*, <4 x float>*, <4 x float>*, <4 x float>*, <4 x float>*, <4 x float>*, <4 x float>*, <4 x float>*, <4 x float>*, [4 x <4 x float>]*, i32*, <4 x i32>*, i64)* @t to i8*) ], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0] - -define void @t(%struct.XXDState* %gldst, <4 x float>* %prgrm, <4 x float>** %buffs, %struct._XXVMConstants* %cnstn, %struct.YYToken* %pstrm, %struct.XXVMVPContext* %vmctx, %struct.XXVMTextures* %txtrs, %struct.XXVMVPStack* %vpstk, <4 x float>* %atr0, <4 x float>* %atr1, <4 x float>* %atr2, <4 x float>* %atr3, <4 x float>* %vtx0, <4 x float>* %vtx1, <4 x float>* %vtx2, <4 x float>* %vtx3, [4 x <4 x float>]* %tmpGbl, i32* %oldMsk, <4 x i32>* %adrGbl, i64 %key_token) nounwind { -entry: -; CHECK: t: - %0 = trunc i64 %key_token to i32 ; <i32> [#uses=1] - %1 = getelementptr %struct.YYToken* %pstrm, i32 %0 ; <%struct.YYToken*> [#uses=5] - br label %bb1132 - -bb51: ; preds = %bb1132 -; CHECK: .align 4 -; CHECK: xorl %ecx, %ecx -; CHECK: andl $7 - %2 = getelementptr %struct.YYToken* %1, i32 %operation.0.rec, i32 0, i32 0 ; <i16*> [#uses=1] - %3 = load i16* %2, align 1 ; <i16> [#uses=3] - %4 = lshr i16 %3, 6 ; <i16> [#uses=1] - %5 = trunc i16 %4 to i8 ; <i8> [#uses=1] - %6 = zext i8 %5 to i32 ; <i32> [#uses=1] - %7 = trunc i16 %3 to i8 ; <i8> [#uses=1] - %8 = and i8 %7, 7 ; <i8> [#uses=1] - %mask5556 = zext i8 %8 to i32 ; <i32> [#uses=3] - %.sum1324 = add i32 %mask5556, 2 ; <i32> [#uses=1] - %.rec = add i32 %operation.0.rec, %.sum1324 ; <i32> [#uses=1] - %9 = bitcast %struct.YYToken* %operation.0 to i32* ; <i32*> [#uses=1] - %10 = load i32* %9, align 1 ; <i32> [#uses=1] - %11 = lshr i32 %10, 16 ; <i32> [#uses=2] - %12 = trunc i32 %11 to i8 ; <i8> [#uses=1] - %13 = and i8 %12, 1 ; <i8> [#uses=1] - %14 = lshr i16 %3, 15 ; <i16> [#uses=1] - %15 = trunc i16 %14 to i8 ; <i8> [#uses=1] - %16 = or i8 %13, %15 ; <i8> [#uses=1] - %17 = icmp eq i8 %16, 0 ; <i1> [#uses=1] - br i1 %17, label %bb94, label %bb75 - -bb75: ; preds = %bb51 - %18 = getelementptr %struct.YYToken* %1, i32 0, i32 0, i32 0 ; <i16*> [#uses=1] - %19 = load i16* %18, align 4 ; <i16> [#uses=1] - %20 = load i16* null, align 2 ; <i16> [#uses=1] - %21 = zext i16 %19 to i64 ; <i64> [#uses=1] - %22 = zext i16 %20 to i64 ; <i64> [#uses=1] - %23 = shl i64 %22, 16 ; <i64> [#uses=1] - %.ins1177 = or i64 %23, %21 ; <i64> [#uses=1] - %.ins1175 = or i64 %.ins1177, 0 ; <i64> [#uses=1] - %24 = and i32 %11, 1 ; <i32> [#uses=1] - %.neg1333 = sub i32 %mask5556, %24 ; <i32> [#uses=1] - %.neg1335 = sub i32 %.neg1333, 0 ; <i32> [#uses=1] - %25 = sub i32 %.neg1335, 0 ; <i32> [#uses=1] - br label %bb94 - -bb94: ; preds = %bb75, %bb51 - %extraToken.0 = phi i64 [ %.ins1175, %bb75 ], [ %extraToken.1, %bb51 ] ; <i64> [#uses=1] - %argCount.0 = phi i32 [ %25, %bb75 ], [ %mask5556, %bb51 ] ; <i32> [#uses=1] - %operation.0.sum1392 = add i32 %operation.0.rec, 1 ; <i32> [#uses=2] - %26 = getelementptr %struct.YYToken* %1, i32 %operation.0.sum1392, i32 0, i32 0 ; <i16*> [#uses=1] - %27 = load i16* %26, align 4 ; <i16> [#uses=1] - %28 = getelementptr %struct.YYToken* %1, i32 %operation.0.sum1392, i32 0, i32 1 ; <i16*> [#uses=1] - %29 = load i16* %28, align 2 ; <i16> [#uses=1] - store i16 %27, i16* null, align 8 - store i16 %29, i16* null, align 2 - br i1 false, label %bb1132, label %bb110 - -bb110: ; preds = %bb94 - switch i32 %6, label %bb1078 [ - i32 30, label %bb960 - i32 32, label %bb801 - i32 38, label %bb809 - i32 78, label %bb1066 - ] - -bb801: ; preds = %bb110 - unreachable - -bb809: ; preds = %bb110 - unreachable - -bb960: ; preds = %bb110 - %30 = icmp eq i32 %argCount.0, 1 ; <i1> [#uses=1] - br i1 %30, label %bb962, label %bb965 - -bb962: ; preds = %bb960 - unreachable - -bb965: ; preds = %bb960 - unreachable - -bb1066: ; preds = %bb110 - unreachable - -bb1078: ; preds = %bb110 - unreachable - -bb1132: ; preds = %bb94, %entry - %extraToken.1 = phi i64 [ undef, %entry ], [ %extraToken.0, %bb94 ] ; <i64> [#uses=1] - %operation.0.rec = phi i32 [ 0, %entry ], [ %.rec, %bb94 ] ; <i32> [#uses=4] - %operation.0 = getelementptr %struct.YYToken* %1, i32 %operation.0.rec ; <%struct.YYToken*> [#uses=1] - br i1 false, label %bb1134, label %bb51 - -bb1134: ; preds = %bb1132 - ret void -} diff --git a/test/CodeGen/X86/2009-01-12-CoalescerBug.ll b/test/CodeGen/X86/2009-01-12-CoalescerBug.ll deleted file mode 100644 index 27a7113ffd56..000000000000 --- a/test/CodeGen/X86/2009-01-12-CoalescerBug.ll +++ /dev/null @@ -1,84 +0,0 @@ -; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | grep movq | count 2 -; PR3311 - - %struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } - %struct.VEC_basic_block_base = type { i32, i32, [1 x %struct.basic_block_def*] } - %struct.VEC_basic_block_gc = type { %struct.VEC_basic_block_base } - %struct.VEC_edge_base = type { i32, i32, [1 x %struct.edge_def*] } - %struct.VEC_edge_gc = type { %struct.VEC_edge_base } - %struct.VEC_rtx_base = type { i32, i32, [1 x %struct.rtx_def*] } - %struct.VEC_rtx_gc = type { %struct.VEC_rtx_base } - %struct.VEC_temp_slot_p_base = type { i32, i32, [1 x %struct.temp_slot*] } - %struct.VEC_temp_slot_p_gc = type { %struct.VEC_temp_slot_p_base } - %struct.VEC_tree_base = type { i32, i32, [1 x %struct.tree_node*] } - %struct.VEC_tree_gc = type { %struct.VEC_tree_base } - %struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] } - %struct.basic_block_def = type { %struct.tree_node*, %struct.VEC_edge_gc*, %struct.VEC_edge_gc*, i8*, %struct.loop*, [2 x %struct.et_node*], %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_il_dependent, %struct.tree_node*, %struct.edge_prediction*, i64, i32, i32, i32, i32 } - %struct.basic_block_il_dependent = type { %struct.rtl_bb_info* } - %struct.bitmap_element_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, [2 x i64] } - %struct.bitmap_head_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, %struct.bitmap_obstack* } - %struct.bitmap_obstack = type { %struct.bitmap_element_def*, %struct.bitmap_head_def*, %struct.obstack } - %struct.block_symbol = type { [3 x %struct.rtunion], %struct.object_block*, i64 } - %struct.c_arg_info = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i8 } - %struct.c_language_function = type { %struct.stmt_tree_s } - %struct.c_switch = type opaque - %struct.control_flow_graph = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.VEC_basic_block_gc*, i32, i32, i32, %struct.VEC_basic_block_gc*, i32 } - %struct.edge_def = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.edge_def_insns, i8*, %struct.location_t*, i32, i32, i64, i32 } - %struct.edge_def_insns = type { %struct.rtx_def* } - %struct.edge_prediction = type opaque - %struct.eh_status = type opaque - %struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** } - %struct.et_node = type opaque - %struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* } - %struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.control_flow_graph*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.VEC_temp_slot_p_gc*, %struct.temp_slot*, %struct.var_refs_queue*, i32, i32, i32, i32, %struct.machine_function*, i32, i32, %struct.language_function*, %struct.htab*, %struct.rtx_def*, i32, i32, i32, %struct.location_t, %struct.VEC_tree_gc*, %struct.tree_node*, i8*, i8*, i8*, i8*, i8*, %struct.tree_node*, i8, i8, i8, i8, i8, i8 } - %struct.htab = type { i32 (i8*)*, i32 (i8*, i8*)*, void (i8*)*, i8**, i64, i64, i64, i32, i32, i8* (i64, i64)*, void (i8*)*, i8*, i8* (i8*, i64, i64)*, void (i8*, i8*)*, i32 } - %struct.initial_value_struct = type opaque - %struct.lang_decl = type { i8 } - %struct.language_function = type { %struct.c_language_function, %struct.tree_node*, %struct.tree_node*, %struct.c_switch*, %struct.c_arg_info*, i32, i32, i32, i32 } - %struct.location_t = type { i8*, i32 } - %struct.loop = type opaque - %struct.machine_function = type { %struct.stack_local_entry*, i8*, %struct.rtx_def*, i32, i32, [4 x i32], i32, i32, i32 } - %struct.object_block = type { %struct.section*, i32, i64, %struct.VEC_rtx_gc*, %struct.VEC_rtx_gc* } - %struct.obstack = type { i64, %struct._obstack_chunk*, i8*, i8*, i8*, i64, i32, %struct._obstack_chunk* (i8*, i64)*, void (i8*, %struct._obstack_chunk*)*, i8*, i8 } - %struct.omp_clause_subcode = type { i32 } - %struct.rtl_bb_info = type { %struct.rtx_def*, %struct.rtx_def*, %struct.bitmap_head_def*, %struct.bitmap_head_def*, %struct.rtx_def*, %struct.rtx_def*, i32 } - %struct.rtunion = type { i8* } - %struct.rtx_def = type { i16, i8, i8, %struct.u } - %struct.section = type { %struct.unnamed_section } - %struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* } - %struct.stack_local_entry = type opaque - %struct.stmt_tree_s = type { %struct.tree_node*, i32 } - %struct.temp_slot = type opaque - %struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %union.tree_ann_d*, i8, i8, i8, i8, i8 } - %struct.tree_decl_common = type { %struct.tree_decl_minimal, %struct.tree_node*, i8, i8, i8, i8, i8, i32, i32, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* } - %struct.tree_decl_minimal = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, %struct.tree_node* } - %struct.tree_decl_non_common = type { %struct.tree_decl_with_vis, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node* } - %struct.tree_decl_with_rtl = type { %struct.tree_decl_common, %struct.rtx_def*, i32 } - %struct.tree_decl_with_vis = type { %struct.tree_decl_with_rtl, %struct.tree_node*, %struct.tree_node*, i8, i8, i8, i8 } - %struct.tree_function_decl = type { %struct.tree_decl_non_common, i32, i8, i8, i64, %struct.function* } - %struct.tree_node = type { %struct.tree_function_decl } - %struct.u = type { %struct.block_symbol } - %struct.unnamed_section = type { %struct.omp_clause_subcode, void (i8*)*, i8*, %struct.section* } - %struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* } - %struct.varasm_status = type opaque - %union.tree_ann_d = type opaque -@.str1 = external constant [31 x i8] ; <[31 x i8]*> [#uses=1] -@integer_types = external global [11 x %struct.tree_node*] ; <[11 x %struct.tree_node*]*> [#uses=1] -@__FUNCTION__.31164 = external constant [23 x i8], align 16 ; <[23 x i8]*> [#uses=1] -@llvm.used = appending global [1 x i8*] [ i8* bitcast (i32 (i32, i32)* @c_common_type_for_size to i8*) ], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0] - -define i32 @c_common_type_for_size(i32 %bits, i32 %unsignedp) nounwind { -entry: - %0 = load %struct.tree_node** getelementptr ([11 x %struct.tree_node*]* @integer_types, i32 0, i64 5), align 8 ; <%struct.tree_node*> [#uses=1] - br i1 false, label %bb16, label %bb - -bb: ; preds = %entry - tail call void @tree_class_check_failed(%struct.tree_node* %0, i32 2, i8* getelementptr ([31 x i8]* @.str1, i32 0, i64 0), i32 1785, i8* getelementptr ([23 x i8]* @__FUNCTION__.31164, i32 0, i32 0)) noreturn nounwind - unreachable - -bb16: ; preds = %entry - %tmp = add i32 %bits, %unsignedp ; <i32> [#uses=1] - ret i32 %tmp -} - -declare void @tree_class_check_failed(%struct.tree_node*, i32, i8*, i32, i8*) noreturn diff --git a/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll b/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll index 21b43fb2ddb0..35fac0c02a1e 100644 --- a/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll +++ b/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin9.6 -regalloc=local -disable-fp-elim ; RUN: llc < %s -mtriple=i386-apple-darwin9.6 -regalloc=fast -disable-fp-elim ; rdar://6538384 diff --git a/test/CodeGen/X86/2009-04-14-IllegalRegs.ll b/test/CodeGen/X86/2009-04-14-IllegalRegs.ll index e5d46f98fc2d..bed863e405a8 100644 --- a/test/CodeGen/X86/2009-04-14-IllegalRegs.ll +++ b/test/CodeGen/X86/2009-04-14-IllegalRegs.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -O0 -regalloc=local | not grep sil ; RUN: llc < %s -mtriple=i386-apple-darwin -O0 -regalloc=fast | not grep sil ; rdar://6787136 diff --git a/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll b/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll index d7b9463b5e1b..fcb2ed07dc13 100644 --- a/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll +++ b/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep asm-printer | grep 83 +; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep asm-printer | grep 82 ; rdar://6802189 ; Test if linearscan is unfavoring registers for allocation to allow more reuse diff --git a/test/CodeGen/X86/2009-04-24.ll b/test/CodeGen/X86/2009-04-24.ll index c1ec45fc007e..757042e5be42 100644 --- a/test/CodeGen/X86/2009-04-24.ll +++ b/test/CodeGen/X86/2009-04-24.ll @@ -1,6 +1,4 @@ -; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -regalloc=local -relocation-model=pic > %t -; RUN: grep {leal.*TLSGD.*___tls_get_addr} %t -; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -regalloc=local -relocation-model=pic > %t2 +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -regalloc=fast -relocation-model=pic > %t2 ; RUN: grep {leaq.*TLSGD.*__tls_get_addr} %t2 ; PR4004 diff --git a/test/CodeGen/X86/2009-08-23-linkerprivate.ll b/test/CodeGen/X86/2009-08-23-linkerprivate.ll index 3da8f00a6043..90fac15442aa 100644 --- a/test/CodeGen/X86/2009-08-23-linkerprivate.ll +++ b/test/CodeGen/X86/2009-08-23-linkerprivate.ll @@ -2,7 +2,7 @@ ; ModuleID = '/Volumes/MacOS9/tests/WebKit/JavaScriptCore/profiler/ProfilerServer.mm' -@"\01l_objc_msgSend_fixup_alloc" = linker_private hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16 ; <i32*> [#uses=0] +@"\01l_objc_msgSend_fixup_alloc" = linker_private_weak hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16 ; CHECK: .globl l_objc_msgSend_fixup_alloc ; CHECK: .weak_definition l_objc_msgSend_fixup_alloc diff --git a/test/CodeGen/X86/2009-09-07-CoalescerBug.ll b/test/CodeGen/X86/2009-09-07-CoalescerBug.ll deleted file mode 100644 index 41b4bc087279..000000000000 --- a/test/CodeGen/X86/2009-09-07-CoalescerBug.ll +++ /dev/null @@ -1,47 +0,0 @@ -; RUN: llc < %s -mtriple=x86_64-unknown-freebsd7.2 -code-model=kernel | FileCheck %s -; PR4689 - -%struct.__s = type { [8 x i8] } -%struct.pcb = type { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i16, i8* } -%struct.pcpu = type { i32*, i32*, i32*, i32*, %struct.pcb*, i64, i32, i32, i32, i32 } - -define i64 @hammer_time(i64 %modulep, i64 %physfree) nounwind ssp noredzone noimplicitfloat { -; CHECK: hammer_time: -; CHECK: movq $Xrsvd, %rax -; CHECK: movq $Xrsvd, %rcx -entry: - br i1 undef, label %if.then, label %if.end - -if.then: ; preds = %entry - br label %if.end - -if.end: ; preds = %if.then, %entry - br label %for.body - -for.body: ; preds = %for.inc, %if.end - switch i32 undef, label %if.then76 [ - i32 9, label %for.inc - i32 10, label %for.inc - i32 11, label %for.inc - i32 12, label %for.inc - ] - -if.then76: ; preds = %for.body - unreachable - -for.inc: ; preds = %for.body, %for.body, %for.body, %for.body - br i1 undef, label %for.end, label %for.body - -for.end: ; preds = %for.inc - call void asm sideeffect "mov $1,%gs:$0", "=*m,r,~{dirflag},~{fpsr},~{flags}"(%struct.__s* bitcast (%struct.pcb** getelementptr (%struct.pcpu* null, i32 0, i32 4) to %struct.__s*), i64 undef) nounwind - br label %for.body170 - -for.body170: ; preds = %for.body170, %for.end - store i64 or (i64 and (i64 or (i64 ptrtoint (void (i32, i32, i32, i32)* @Xrsvd to i64), i64 2097152), i64 2162687), i64 or (i64 or (i64 and (i64 shl (i64 ptrtoint (void (i32, i32, i32, i32)* @Xrsvd to i64), i64 32), i64 -281474976710656), i64 140737488355328), i64 15393162788864)), i64* undef - br i1 undef, label %for.end175, label %for.body170 - -for.end175: ; preds = %for.body170 - unreachable -} - -declare void @Xrsvd(i32, i32, i32, i32) ssp noredzone noimplicitfloat diff --git a/test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll b/test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll deleted file mode 100644 index 8cb538b07359..000000000000 --- a/test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll +++ /dev/null @@ -1,29 +0,0 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin10 -post-RA-scheduler=true | FileCheck %s - -; PR4958 - -define i32 @main() nounwind ssp { -entry: -; CHECK: main: - %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] - br label %bb - -bb: ; preds = %bb1, %entry -; CHECK: addl $1 -; CHECK-NEXT: movl %e -; CHECK-NEXT: adcl $0 - %i.0 = phi i64 [ 0, %entry ], [ %0, %bb1 ] ; <i64> [#uses=1] - %0 = add nsw i64 %i.0, 1 ; <i64> [#uses=2] - %1 = icmp sgt i32 0, 0 ; <i1> [#uses=1] - br i1 %1, label %bb2, label %bb1 - -bb1: ; preds = %bb - %2 = icmp sle i64 %0, 1 ; <i1> [#uses=1] - br i1 %2, label %bb, label %bb2 - -bb2: ; preds = %bb1, %bb - br label %return - -return: ; preds = %bb2 - ret i32 0 -} diff --git a/test/CodeGen/X86/2009-12-12-CoalescerBug.ll b/test/CodeGen/X86/2009-12-12-CoalescerBug.ll deleted file mode 100644 index 4e8f5fdc530d..000000000000 --- a/test/CodeGen/X86/2009-12-12-CoalescerBug.ll +++ /dev/null @@ -1,40 +0,0 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s - -define i32 @do_loop(i32* nocapture %sdp, i32* nocapture %ddp, i8* %mdp, i8* nocapture %cdp, i32 %w) nounwind readonly optsize ssp { -entry: - br label %bb - -bb: ; preds = %bb5, %entry - %mask.1.in = load i8* undef, align 1 ; <i8> [#uses=3] - %0 = icmp eq i8 %mask.1.in, 0 ; <i1> [#uses=1] - br i1 %0, label %bb5, label %bb1 - -bb1: ; preds = %bb - br i1 undef, label %bb2, label %bb3 - -bb2: ; preds = %bb1 -; CHECK: %bb2 -; CHECK: movb %ch, %al - %1 = zext i8 %mask.1.in to i32 ; <i32> [#uses=1] - %2 = zext i8 undef to i32 ; <i32> [#uses=1] - %3 = mul i32 %2, %1 ; <i32> [#uses=1] - %4 = add i32 %3, 1 ; <i32> [#uses=1] - %5 = add i32 %4, 0 ; <i32> [#uses=1] - %6 = lshr i32 %5, 8 ; <i32> [#uses=1] - %retval12.i = trunc i32 %6 to i8 ; <i8> [#uses=1] - br label %bb3 - -bb3: ; preds = %bb2, %bb1 - %mask.0.in = phi i8 [ %retval12.i, %bb2 ], [ %mask.1.in, %bb1 ] ; <i8> [#uses=1] - %7 = icmp eq i8 %mask.0.in, 0 ; <i1> [#uses=1] - br i1 %7, label %bb5, label %bb4 - -bb4: ; preds = %bb3 - br label %bb5 - -bb5: ; preds = %bb4, %bb3, %bb - br i1 undef, label %bb6, label %bb - -bb6: ; preds = %bb5 - ret i32 undef -} diff --git a/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll index eb21dc234a0d..7325f4ae1251 100644 --- a/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll +++ b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll @@ -9,11 +9,11 @@ ; lowering of arguments potentially overwrites the value. ; ; Move return address (76(%esp)) to a temporary register (%ebp) -; CHECK: movl 76(%esp), %ebp +; CHECK: movl 76(%esp), [[REGISTER:%[a-z]+]] ; Overwrite return addresss -; CHECK: movl %ecx, 76(%esp) +; CHECK: movl %ebx, 76(%esp) ; Move return address from temporary register (%ebp) to new stack location (60(%esp)) -; CHECK: movl %ebp, 60(%esp) +; CHECK: movl [[REGISTER]], 60(%esp) %tupl_p = type [9 x i32]* diff --git a/test/CodeGen/X86/2010-03-17-ISelBug.ll b/test/CodeGen/X86/2010-03-17-ISelBug.ll index 609b4e24900b..ba21902f7d0a 100644 --- a/test/CodeGen/X86/2010-03-17-ISelBug.ll +++ b/test/CodeGen/X86/2010-03-17-ISelBug.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=i386-apple-darwin5 + ; rdar://7761790 %"struct..0$_485" = type { i16, i16, i32 } @@ -37,3 +38,30 @@ bb169: ; preds = %bb169, %bb.nph380 %4 = add nsw i32 %index.6379, 1 ; <i32> [#uses=1] br label %bb169 } + +; PR7368 + +%struct.bufBit_s = type { i8*, i8 } + +define fastcc void @printSwipe([2 x [256 x %struct.bufBit_s]]* nocapture %colourLines) nounwind { +entry: + br label %for.body190 + +for.body261.i: ; preds = %for.body261.i, %for.body190 + %line.3300.i = phi i32 [ undef, %for.body190 ], [ %add292.i, %for.body261.i ] ; <i32> [#uses=3] + %conv268.i = and i32 %line.3300.i, 255 ; <i32> [#uses=1] + %tmp278.i = getelementptr [2 x [256 x %struct.bufBit_s]]* %colourLines, i32 undef, i32 %pen.1100, i32 %conv268.i, i32 0 ; <i8**> [#uses=1] + store i8* undef, i8** %tmp278.i + %tmp338 = shl i32 %line.3300.i, 3 ; <i32> [#uses=1] + %tmp339 = and i32 %tmp338, 2040 ; <i32> [#uses=1] + %tmp285.i = getelementptr i8* %scevgep328, i32 %tmp339 ; <i8*> [#uses=1] + store i8 undef, i8* %tmp285.i + %add292.i = add nsw i32 0, %line.3300.i ; <i32> [#uses=1] + br i1 undef, label %for.body190, label %for.body261.i + +for.body190: ; preds = %for.body261.i, %for.body190, %bb.nph104 + %pen.1100 = phi i32 [ 0, %entry ], [ %inc230, %for.body261.i ], [ %inc230, %for.body190 ] ; <i32> [#uses=3] + %scevgep328 = getelementptr [2 x [256 x %struct.bufBit_s]]* %colourLines, i32 undef, i32 %pen.1100, i32 0, i32 1 ; <i8*> [#uses=1] + %inc230 = add i32 %pen.1100, 1 ; <i32> [#uses=2] + br i1 undef, label %for.body190, label %for.body261.i +} diff --git a/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll b/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll index 4c95179350a6..e20f1d8c79ce 100644 --- a/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll +++ b/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -O0 -regalloc=local -relocation-model=pic -disable-fp-elim | FileCheck %s ; RUN: llc < %s -O0 -regalloc=fast -relocation-model=pic -disable-fp-elim | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" target triple = "i386-apple-darwin10.0.0" diff --git a/test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll b/test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll index 375f42499cbc..74a5ec28db1e 100644 --- a/test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll +++ b/test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll @@ -1,4 +1,3 @@ -; RUN-XFAIL: llc < %s -O0 -regalloc=local | FileCheck %s ; RUN: llc < %s -O0 -regalloc=fast | FileCheck %s ; PR6520 diff --git a/test/CodeGen/X86/2010-05-06-LocalInlineAsmClobber.ll b/test/CodeGen/X86/2010-05-06-LocalInlineAsmClobber.ll index e554f9feb2f1..90eb84d1dc40 100644 --- a/test/CodeGen/X86/2010-05-06-LocalInlineAsmClobber.ll +++ b/test/CodeGen/X86/2010-05-06-LocalInlineAsmClobber.ll @@ -1,4 +1,3 @@ -; RUN: llc -regalloc=local %s -o %t ; RUN: llc -regalloc=fast %s -o %t ; PR7066 diff --git a/test/CodeGen/X86/2010-06-09-FastAllocRegisters.ll b/test/CodeGen/X86/2010-06-09-FastAllocRegisters.ll new file mode 100644 index 000000000000..7c7792ac65a0 --- /dev/null +++ b/test/CodeGen/X86/2010-06-09-FastAllocRegisters.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -O0 -disable-fp-elim -relocation-model=pic +; PR7313 +; +; The inline asm in this function clobbers almost all allocatable registers. +; Make sure that the register allocator recovers. +; +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +declare void @snapshot() + +define void @test_too_many_longs() nounwind { +entry: + call void asm sideeffect "xor %rax, %rax\0A\09xor %rbx, %rbx\0A\09xor %rcx, %rcx\0A\09xor %rdx, %rdx\0A\09xor %rsi, %rsi\0A\09xor %rdi, %rdi\0A\09xor %r8, %r8\0A\09xor %r9, %r9\0A\09xor %r10, %r10\0A\09xor %r11, %r11\0A\09xor %r12, %r12\0A\09xor %r13, %r13\0A\09xor %r14, %r14\0A\09xor %r15, %r15\0A\09", "~{fpsr},~{flags},~{r15},~{r14},~{r13},~{r12},~{r11},~{r10},~{r9},~{r8},~{rdi},~{rsi},~{rdx},~{rcx},~{rbx},~{rax}"() nounwind + call void bitcast (void ()* @snapshot to void (i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64)*)(i64 32, i64 33, i64 34, i64 35, i64 36, i64 37, i64 38, i64 39, i64 40, i64 41, i64 42, i64 43) nounwind + ret void +} diff --git a/test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll b/test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll new file mode 100644 index 000000000000..b22a391ef358 --- /dev/null +++ b/test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll @@ -0,0 +1,6 @@ +; RUN: llc -fast-isel -march=x86 < %s | grep %fs: + +define i32 @test1(i32 addrspace(257)* %arg) nounwind { + %tmp = load i32 addrspace(257)* %arg + ret i32 %tmp +} diff --git a/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll b/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll new file mode 100644 index 000000000000..4639866afc5e --- /dev/null +++ b/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll @@ -0,0 +1,29 @@ +; RUN: llc -regalloc=fast < %s | FileCheck %s +; PR7382 +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +@.str = private constant [23 x i8] c"This should be -1: %d\0A\00" ; <[23 x i8]*> [#uses=1] + +define i32 @main() { +entry: + %retval = alloca i32, align 4 ; <i32*> [#uses=3] + %v = alloca i32, align 4 ; <i32*> [#uses=3] + store i32 0, i32* %retval + %zero = load i32* %retval +; The earlyclobber register EC0 should not be spilled before the inline asm. +; Yes, check-not can refer to FileCheck variables defined in the future. +; CHECK-NOT: [[EC0]]{{.*}}(%rsp) +; CHECK: bsr {{[^,]*}}, [[EC0:%...]] + %0 = call i32 asm "bsr $1, $0\0A\09cmovz $2, $0", "=&r,ro,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i32 %zero, i32 -1) nounwind, !srcloc !0 ; <i32> [#uses=1] + store i32 %0, i32* %v + %tmp = load i32* %v ; <i32> [#uses=1] + %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 0), i32 %tmp) ; <i32> [#uses=0] + store i32 0, i32* %retval + %1 = load i32* %retval ; <i32> [#uses=1] + ret i32 %0 +} + +declare i32 @printf(i8*, ...) + +!0 = metadata !{i32 191} diff --git a/test/CodeGen/X86/2010-06-24-g-constraint-crash.ll b/test/CodeGen/X86/2010-06-24-g-constraint-crash.ll new file mode 100644 index 000000000000..2a938d941e2d --- /dev/null +++ b/test/CodeGen/X86/2010-06-24-g-constraint-crash.ll @@ -0,0 +1,15 @@ +; RUN: llc %s -mtriple=x86_64-apple-darwin10 -disable-fp-elim -o /dev/null +; Formerly crashed, rdar://8015842 + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +%0 = type { i64, i64, i64, i64, i64 } + +@utcbs.1559 = internal global [3 x i64] zeroinitializer ; <[3 x i64]*> [#uses=1] + +define void @bar() nounwind ssp { +entry: + %asmtmp.i.i = tail call %0 asm sideeffect "push %rbp; syscall; pop %rbp\0A", "={ax},={di},={si},={dx},={bx},{ax},{di},{si},{dx},{bx},~{dirflag},~{fpsr},~{flags},~{memory},~{r15},~{r14},~{r13},~{r12},~{r11},~{r10},~{r9},~{r8},~{rcx}"(i32 7, i64 -1, i64 0, i64 -1, i64 -1) nounwind ; <%0> [#uses=0] + %asmtmp.i1.i = tail call %0 asm sideeffect "mov $10, %r8;\0Amov $11, %r9;\0Amov $12, %r10;\0Apush %rbp; syscall; pop %rbp\0A", "={ax},={di},={si},={dx},={bx},{ax},{di},{si},{dx},{bx},imr,imr,imr,~{dirflag},~{fpsr},~{flags},~{memory},~{r15},~{r14},~{r13},~{r12},~{r11},~{r10},~{r9},~{r8},~{rcx}"(i32 8, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 0, i8* bitcast (i64* getelementptr inbounds ([3 x i64]* @utcbs.1559, i64 0, i64 2) to i8*)) nounwind ; <%0> [#uses=0] + ret void +} diff --git a/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll b/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll new file mode 100644 index 000000000000..c6421a247eaa --- /dev/null +++ b/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll @@ -0,0 +1,39 @@ +; RUN: llc -O1 -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim < %s | FileCheck %s +; <rdar://problem/8124405> + +%struct.type = type { %struct.subtype*, i32, i8, i32, i8, i32, i32, i32, i32, i32, i8, i32, i32, i32, i32, i32, [256 x i32], i32, [257 x i32], [257 x i32], i32*, i16*, i8*, i32, i32, i32, i32, i32, [256 x i8], [16 x i8], [256 x i8], [4096 x i8], [16 x i32], [18002 x i8], [18002 x i8], [6 x [258 x i8]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, i32*, i32* } +%struct.subtype = type { i8*, i32, i32, i32, i8*, i32, i32, i32, i8*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i8* } + +define i32 @func(%struct.type* %s) nounwind optsize ssp { +entry: + %tmp1 = getelementptr inbounds %struct.type* %s, i32 0, i32 1 + %tmp2 = load i32* %tmp1, align 8 + %tmp3 = icmp eq i32 %tmp2, 10 + %tmp4 = getelementptr inbounds %struct.type* %s, i32 0, i32 40 + br i1 %tmp3, label %bb, label %entry.bb1_crit_edge + +entry.bb1_crit_edge: + br label %bb1 + +bb: + +; The point of this code is that %rdi is set to %rdi+64036 for the rep;stosl +; statement. It can be an ADD or LEA instruction, it's not important which one +; it is. +; +; CHECK: ## %bb +; CHECK-NEXT: addq $64036, %rdi +; CHECK: rep;stosl + + %tmp5 = bitcast i32* %tmp4 to i8* + call void @llvm.memset.p0i8.i64(i8* %tmp5, i8 0, i64 84, i32 4, i1 false) + %tmp6 = getelementptr inbounds %struct.type* %s, i32 0, i32 62 + store i32* null, i32** %tmp6, align 8 + br label %bb1 + +bb1: + store i32 10, i32* %tmp1, align 8 + ret i32 42 +} + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind diff --git a/test/CodeGen/X86/2010-06-25-asm-RA-crash.ll b/test/CodeGen/X86/2010-06-25-asm-RA-crash.ll new file mode 100644 index 000000000000..68a6a134de5c --- /dev/null +++ b/test/CodeGen/X86/2010-06-25-asm-RA-crash.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s -disable-fp-elim -mtriple=i686-pc-mingw32 + +%struct.__SEH2Frame = type {} + +define void @_SEH2FrameHandler() nounwind { +entry: + %target.addr.i = alloca i8*, align 4 ; <i8**> [#uses=2] + %frame = alloca %struct.__SEH2Frame*, align 4 ; <%struct.__SEH2Frame**> [#uses=1] + %tmp = load %struct.__SEH2Frame** %frame ; <%struct.__SEH2Frame*> [#uses=1] + %conv = bitcast %struct.__SEH2Frame* %tmp to i8* ; <i8*> [#uses=1] + store i8* %conv, i8** %target.addr.i + %tmp.i = load i8** %target.addr.i ; <i8*> [#uses=1] + call void asm sideeffect "push %ebp\0Apush $$0\0Apush $$0\0Apush $$Return${:uid}\0Apush $0\0Acall ${1:c}\0AReturn${:uid}: pop %ebp\0A", "imr,imr,~{ax},~{bx},~{cx},~{dx},~{si},~{di},~{flags},~{memory},~{dirflag},~{fpsr},~{flags}"(i8* %tmp.i, void (...)* @RtlUnwind) nounwind, !srcloc !0 + ret void +} + +declare x86_stdcallcc void @RtlUnwind(...) + +!0 = metadata !{i32 215} diff --git a/test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll b/test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll new file mode 100644 index 000000000000..e1491a03d8a8 --- /dev/null +++ b/test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -march=x86 -O0 | FileCheck %s +; PR7509 +target triple = "i386-apple-darwin10" +%asmtype = type { i32, i8*, i32, i32 } + +; Arguments 1 and 4 must be the same. No other output arguments may be +; allocated %eax. + +; CHECK: InlineAsm Start +; CHECK: arg1 %[[A1:...]] +; CHECK-NOT: ax +; CHECK: arg4 %[[A1]] +; CHECK: InlineAsm End + +define i32 @func(i8* %s) nounwind ssp { +entry: + %0 = tail call %asmtype asm "arg0 $0\0A\09arg1 $1\0A\09arg2 $2\0A\09arg3 $3\0A\09arg4 $4", "={ax},=r,=r,=r,1,~{dirflag},~{fpsr},~{flags}"(i8* %s) nounwind, !srcloc !0 ; <%0> [#uses=1] + %asmresult = extractvalue %asmtype %0, 0 ; <i64> [#uses=1] + ret i32 %asmresult +} + +!0 = metadata !{i32 108} diff --git a/test/CodeGen/X86/2010-06-28-matched-g-constraint.ll b/test/CodeGen/X86/2010-06-28-matched-g-constraint.ll new file mode 100644 index 000000000000..82dac9d9930e --- /dev/null +++ b/test/CodeGen/X86/2010-06-28-matched-g-constraint.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin11 | FileCheck %s +; Any register is OK for %0, but it must be a register, not memory. + +define i32 @foo() nounwind ssp { +entry: +; CHECK: GCROOT %eax + %_r = alloca i32, align 4 ; <i32*> [#uses=2] + call void asm "/* GCROOT $0 */", "=*imr,0,~{dirflag},~{fpsr},~{flags}"(i32* %_r, i32 4) nounwind + %0 = load i32* %_r, align 4 ; <i32> [#uses=1] + ret i32 %0 +} diff --git a/test/CodeGen/X86/2010-07-02-UnfoldBug.ll b/test/CodeGen/X86/2010-07-02-UnfoldBug.ll new file mode 100644 index 000000000000..79219dcfe60a --- /dev/null +++ b/test/CodeGen/X86/2010-07-02-UnfoldBug.ll @@ -0,0 +1,99 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin +; rdar://8154265 + +declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone + +declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone + +define void @_ZN2CA3OGL20fill_surface_mesh_3dERNS0_7ContextEPKNS_6Render13MeshTransformEPKNS0_5LayerEPNS0_7SurfaceEfNS0_13TextureFilterESC_f() nounwind optsize ssp { +entry: + br i1 undef, label %bb2.thread, label %bb2 + +bb2.thread: ; preds = %entry + br i1 undef, label %bb41, label %bb10.preheader + +bb2: ; preds = %entry + unreachable + +bb10.preheader: ; preds = %bb2.thread + br i1 undef, label %bb9, label %bb12 + +bb9: ; preds = %bb9, %bb10.preheader + br i1 undef, label %bb9, label %bb12 + +bb12: ; preds = %bb9, %bb10.preheader + br i1 undef, label %bb4.i.i, label %bb3.i.i + +bb3.i.i: ; preds = %bb12 + unreachable + +bb4.i.i: ; preds = %bb12 + br i1 undef, label %bb8.i.i, label %_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit + +bb8.i.i: ; preds = %bb4.i.i + br i1 undef, label %_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit, label %bb9.i.i + +bb9.i.i: ; preds = %bb8.i.i + br i1 undef, label %bb11.i.i, label %bb10.i.i + +bb10.i.i: ; preds = %bb9.i.i + unreachable + +bb11.i.i: ; preds = %bb9.i.i + unreachable + +_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit: ; preds = %bb8.i.i, %bb4.i.i + br i1 undef, label %bb19, label %bb14 + +bb14: ; preds = %_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit + unreachable + +bb19: ; preds = %_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit + br i1 undef, label %bb.i50, label %bb6.i + +bb.i50: ; preds = %bb19 + unreachable + +bb6.i: ; preds = %bb19 + br i1 undef, label %bb28, label %bb.nph106 + +bb22: ; preds = %bb24.preheader + br i1 undef, label %bb2.i.i, label %bb.i.i49 + +bb.i.i49: ; preds = %bb22 + %0 = load float* undef, align 4 ; <float> [#uses=1] + %1 = insertelement <4 x float> undef, float %0, i32 0 ; <<4 x float>> [#uses=1] + %2 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x float> %1) nounwind readnone ; <<4 x float>> [#uses=1] + %3 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %2, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>) nounwind readnone ; <<4 x float>> [#uses=1] + %4 = extractelement <4 x float> %3, i32 0 ; <float> [#uses=1] + store float %4, float* undef, align 4 + %5 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x float> undef) nounwind readnone ; <<4 x float>> [#uses=1] + %6 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %5, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>) nounwind readnone ; <<4 x float>> [#uses=1] + %7 = extractelement <4 x float> %6, i32 0 ; <float> [#uses=1] + store float %7, float* undef, align 4 + unreachable + +bb2.i.i: ; preds = %bb22 + unreachable + +bb26.loopexit: ; preds = %bb24.preheader + br i1 undef, label %bb28, label %bb24.preheader + +bb.nph106: ; preds = %bb6.i + br label %bb24.preheader + +bb24.preheader: ; preds = %bb.nph106, %bb26.loopexit + br i1 undef, label %bb22, label %bb26.loopexit + +bb28: ; preds = %bb26.loopexit, %bb6.i + unreachable + +bb41: ; preds = %bb2.thread + br i1 undef, label %return, label %bb46 + +bb46: ; preds = %bb41 + ret void + +return: ; preds = %bb41 + ret void +} diff --git a/test/CodeGen/X86/2010-07-02-asm-alignstack.ll b/test/CodeGen/X86/2010-07-02-asm-alignstack.ll new file mode 100644 index 000000000000..cb47d208dd44 --- /dev/null +++ b/test/CodeGen/X86/2010-07-02-asm-alignstack.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s + +define void @foo() nounwind ssp { +entry: +; CHECK: foo +; CHECK: subq $8, %rsp +; CHECK: int $3 + call void asm sideeffect alignstack "# top of block", "~{dirflag},~{fpsr},~{flags},~{edi},~{esi},~{edx},~{ecx},~{eax}"() nounwind + call void asm sideeffect alignstack ".file \22small.c\22", "~{dirflag},~{fpsr},~{flags}"() nounwind + call void asm sideeffect alignstack ".line 3", "~{dirflag},~{fpsr},~{flags}"() nounwind + call void asm sideeffect alignstack "int $$3", "~{dirflag},~{fpsr},~{flags},~{memory}"() nounwind + br label %return + +return: ; preds = %entry + ret void +} + +define void @bar() nounwind ssp { +entry: +; CHECK: bar +; CHECK-NOT: subq $8, %rsp +; CHECK: int $3 + call void asm sideeffect "# top of block", "~{dirflag},~{fpsr},~{flags},~{edi},~{esi},~{edx},~{ecx},~{eax}"() nounwind + call void asm sideeffect ".file \22small.c\22", "~{dirflag},~{fpsr},~{flags}"() nounwind + call void asm sideeffect ".line 3", "~{dirflag},~{fpsr},~{flags}"() nounwind + call void asm sideeffect "int $$3", "~{dirflag},~{fpsr},~{flags},~{memory}"() nounwind + br label %return + +return: ; preds = %entry + ret void +} diff --git a/test/CodeGen/X86/2010-07-06-DbgCrash.ll b/test/CodeGen/X86/2010-07-06-DbgCrash.ll new file mode 100644 index 000000000000..edd6015b0d28 --- /dev/null +++ b/test/CodeGen/X86/2010-07-06-DbgCrash.ll @@ -0,0 +1,29 @@ +; RUN: llc -O0 -relocation-model pic < %s -o /dev/null +; PR7545 +@.str = private constant [4 x i8] c"one\00", align 1 ; <[4 x i8]*> [#uses=1] +@.str1 = private constant [4 x i8] c"two\00", align 1 ; <[5 x i8]*> [#uses=1] +@C.9.2167 = internal constant [2 x i8*] [i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0)] +!38 = metadata !{i32 524329, metadata !"pbmsrch.c", metadata !"/Users/grawp/LLVM/test-suite/MultiSource/Benchmarks/MiBench/office-stringsearch", metadata !39} ; [ DW_TAG_file_type ] +!39 = metadata !{i32 524305, i32 0, i32 1, metadata !"pbmsrch.c", metadata !"/Users/grawp/LLVM/test-suite/MultiSource/Benchmarks/MiBench/office-stringsearch", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] +!46 = metadata !{i32 524303, metadata !38, metadata !"", metadata !38, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !47} ; [ DW_TAG_pointer_type ]!97 = metadata !{i32 524334, i32 0, metadata !38, metadata !"main", metadata !"main", metadata !"main", metadata !38, i32 73, metadata !98, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]!101 = metadata !{[2 x i8*]* @C.9.2167} +!47 = metadata !{i32 524324, metadata !38, metadata !"char", metadata !38, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] +!97 = metadata !{i32 524334, i32 0, metadata !38, metadata !"main", metadata !"main", metadata !"main", metadata !38, i32 73, metadata !98, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!98 = metadata !{i32 524309, metadata !38, metadata !"", metadata !38, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !99, i32 0, null} ; [ DW_TAG_subroutine_type ] +!99 = metadata !{metadata !100} +!100 = metadata !{i32 524324, metadata !38, metadata !"int", metadata !38, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!101 = metadata !{[2 x i8*]* @C.9.2167} +!102 = metadata !{i32 524544, metadata !103, metadata !"find_strings", metadata !38, i32 75, metadata !104} ; [ DW_TAG_auto_variable ] +!103 = metadata !{i32 524299, metadata !97, i32 73, i32 0} ; [ DW_TAG_lexical_block ] +!104 = metadata !{i32 524289, metadata !38, metadata !"", metadata !38, i32 0, i64 85312, i64 64, i64 0, i32 0, metadata !46, metadata !105, i32 0, null} ; [ DW_TAG_array_type ] +!105 = metadata !{metadata !106} +!106 = metadata !{i32 524321, i64 0, i64 1332} ; [ DW_TAG_subrange_type ] +!107 = metadata !{i32 73, i32 0, metadata !103, null} + +define i32 @main() nounwind ssp { +bb.nph: + tail call void @llvm.dbg.declare(metadata !101, metadata !102), !dbg !107 + ret i32 0, !dbg !107 +} + +declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone + diff --git a/test/CodeGen/X86/2010-07-06-asm-RIP.ll b/test/CodeGen/X86/2010-07-06-asm-RIP.ll new file mode 100644 index 000000000000..f646afaa266a --- /dev/null +++ b/test/CodeGen/X86/2010-07-06-asm-RIP.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s +; PR 7528 + +@n = global i32 0 ; <i32*> [#uses=2] + +define void @f(i32*) nounwind ssp { + ret void +} + +define void @g() nounwind ssp { +entry: +; CHECK: _g: +; CHECK: push $_f$_f +; CHECK: call _f(%rip) + call void asm sideeffect "push\09$1$1\0A\09call\09${1:a}\0A\09pop\09%edx", "imr,i,~{dirflag},~{fpsr},~{flags},~{memory},~{cc},~{edi},~{esi},~{edx},~{ecx},~{ebx},~{eax}"(i32* @n, void (i32*)* @f) nounwind + br label %return + +return: ; preds = %entry + ret void +} + diff --git a/test/CodeGen/X86/alloca-align-rounding-32.ll b/test/CodeGen/X86/alloca-align-rounding-32.ll new file mode 100644 index 000000000000..c0f1a18123e6 --- /dev/null +++ b/test/CodeGen/X86/alloca-align-rounding-32.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin | grep and | count 1 + +declare void @bar(<2 x i64>* %n) + +define void @foo(i32 %h) { + %p = alloca <2 x i64>, i32 %h + call void @bar(<2 x i64>* %p) + ret void +} + +define void @foo2(i32 %h) { + %p = alloca <2 x i64>, i32 %h, align 32 + call void @bar(<2 x i64>* %p) + ret void +} diff --git a/test/CodeGen/X86/alloca-align-rounding.ll b/test/CodeGen/X86/alloca-align-rounding.ll index f45e9b84b264..3c87dbf2bd78 100644 --- a/test/CodeGen/X86/alloca-align-rounding.ll +++ b/test/CodeGen/X86/alloca-align-rounding.ll @@ -1,16 +1,15 @@ -; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin | grep and | count 1 ; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux | grep and | count 1 declare void @bar(<2 x i64>* %n) -define void @foo(i32 %h) { - %p = alloca <2 x i64>, i32 %h +define void @foo(i64 %h) { + %p = alloca <2 x i64>, i64 %h call void @bar(<2 x i64>* %p) ret void } -define void @foo2(i32 %h) { - %p = alloca <2 x i64>, i32 %h, align 32 +define void @foo2(i64 %h) { + %p = alloca <2 x i64>, i64 %h, align 32 call void @bar(<2 x i64>* %p) ret void } diff --git a/test/CodeGen/X86/break-sse-dep.ll b/test/CodeGen/X86/break-sse-dep.ll index acc0647bc87d..094cbc7bdefc 100644 --- a/test/CodeGen/X86/break-sse-dep.ll +++ b/test/CodeGen/X86/break-sse-dep.ll @@ -4,7 +4,7 @@ define double @t1(float* nocapture %x) nounwind readonly ssp { entry: ; CHECK: t1: ; CHECK: movss (%rdi), %xmm0 -; CHECK; cvtss2sd %xmm0, %xmm0 +; CHECK: cvtss2sd %xmm0, %xmm0 %0 = load float* %x, align 4 %1 = fpext float %0 to double @@ -14,8 +14,49 @@ entry: define float @t2(double* nocapture %x) nounwind readonly ssp optsize { entry: ; CHECK: t2: -; CHECK; cvtsd2ss (%rdi), %xmm0 +; CHECK: cvtsd2ss (%rdi), %xmm0 %0 = load double* %x, align 8 %1 = fptrunc double %0 to float ret float %1 } + +define float @squirtf(float* %x) nounwind { +entry: +; CHECK: squirtf: +; CHECK: movss (%rdi), %xmm0 +; CHECK: sqrtss %xmm0, %xmm0 + %z = load float* %x + %t = call float @llvm.sqrt.f32(float %z) + ret float %t +} + +define double @squirt(double* %x) nounwind { +entry: +; CHECK: squirt: +; CHECK: movsd (%rdi), %xmm0 +; CHECK: sqrtsd %xmm0, %xmm0 + %z = load double* %x + %t = call double @llvm.sqrt.f64(double %z) + ret double %t +} + +define float @squirtf_size(float* %x) nounwind optsize { +entry: +; CHECK: squirtf_size: +; CHECK: sqrtss (%rdi), %xmm0 + %z = load float* %x + %t = call float @llvm.sqrt.f32(float %z) + ret float %t +} + +define double @squirt_size(double* %x) nounwind optsize { +entry: +; CHECK: squirt_size: +; CHECK: sqrtsd (%rdi), %xmm0 + %z = load double* %x + %t = call double @llvm.sqrt.f64(double %z) + ret double %t +} + +declare float @llvm.sqrt.f32(float) +declare double @llvm.sqrt.f64(double) diff --git a/test/CodeGen/X86/crash-O0.ll b/test/CodeGen/X86/crash-O0.ll new file mode 100644 index 000000000000..956d43b4e895 --- /dev/null +++ b/test/CodeGen/X86/crash-O0.ll @@ -0,0 +1,31 @@ +; RUN: llc -O0 -relocation-model=pic -disable-fp-elim < %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10" + +; This file contains functions that may crash llc -O0 + +; The DIV8 instruction produces results in AH and AL, but we don't want to use +; AH in 64-bit mode. The hack used must not generate copyFromReg nodes for +; aliased registers (AX and AL) - RegAllocFast does not like that. +; PR7312 +define i32 @div8() nounwind { +entry: + %0 = trunc i64 undef to i8 ; <i8> [#uses=3] + %1 = udiv i8 0, %0 ; <i8> [#uses=1] + %2 = urem i8 0, %0 ; <i8> [#uses=1] + %3 = icmp uge i8 %2, %0 ; <i1> [#uses=1] + br i1 %3, label %"40", label %"39" + +"39": ; preds = %"36" + %4 = zext i8 %1 to i32 ; <i32> [#uses=1] + %5 = mul nsw i32 %4, undef ; <i32> [#uses=1] + %6 = add nsw i32 %5, undef ; <i32> [#uses=1] + %7 = icmp ne i32 %6, undef ; <i1> [#uses=1] + br i1 %7, label %"40", label %"41" + +"40": ; preds = %"39", %"36" + unreachable + +"41": ; preds = %"39" + unreachable +} diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll index 2f27f35f0acd..a14a48baa355 100644 --- a/test/CodeGen/X86/crash.ll +++ b/test/CodeGen/X86/crash.ll @@ -130,3 +130,14 @@ bb14: bb67: ret void } + +; Crash when trying to copy AH to AL. +; PR7540 +define void @copy8bitregs() nounwind { +entry: + %div.i = sdiv i32 115200, 0 + %shr8.i = lshr i32 %div.i, 8 + %conv4.i = trunc i32 %shr8.i to i8 + call void asm sideeffect "outb $0, ${1:w}", "{ax},N{dx},~{dirflag},~{fpsr},~{flags}"(i8 %conv4.i, i32 1017) nounwind + unreachable +} diff --git a/test/CodeGen/X86/fast-isel-bc.ll b/test/CodeGen/X86/fast-isel-bc.ll index f2696ce814da..8d7dc8f9a7f8 100644 --- a/test/CodeGen/X86/fast-isel-bc.ll +++ b/test/CodeGen/X86/fast-isel-bc.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -O0 -march=x86-64 -mattr=+mmx | FileCheck %s +; RUN: llc < %s -O0 -regalloc=linearscan -march=x86-64 -mattr=+mmx | FileCheck %s ; PR4684 target datalayout = diff --git a/test/CodeGen/X86/fast-isel-gep.ll b/test/CodeGen/X86/fast-isel-gep.ll index 5b8acecc3c18..1270ab78ab5f 100644 --- a/test/CodeGen/X86/fast-isel-gep.ll +++ b/test/CodeGen/X86/fast-isel-gep.ll @@ -51,3 +51,22 @@ entry: ; X64: ret } + +define double @test4(i64 %x, double* %p) nounwind { +entry: + %x.addr = alloca i64, align 8 ; <i64*> [#uses=2] + %p.addr = alloca double*, align 8 ; <double**> [#uses=2] + store i64 %x, i64* %x.addr + store double* %p, double** %p.addr + %tmp = load i64* %x.addr ; <i64> [#uses=1] + %add = add nsw i64 %tmp, 16 ; <i64> [#uses=1] + %tmp1 = load double** %p.addr ; <double*> [#uses=1] + %arrayidx = getelementptr inbounds double* %tmp1, i64 %add ; <double*> [#uses=1] + %tmp2 = load double* %arrayidx ; <double> [#uses=1] + ret double %tmp2 + +; X32: test4: +; X32: 128(%e{{.*}},%e{{.*}},8) +; X64: test4: +; X64: 128(%r{{.*}},%r{{.*}},8) +} diff --git a/test/CodeGen/X86/fast-isel-loads.ll b/test/CodeGen/X86/fast-isel-loads.ll new file mode 100644 index 000000000000..2fbb46c0b9f5 --- /dev/null +++ b/test/CodeGen/X86/fast-isel-loads.ll @@ -0,0 +1,23 @@ +; RUN: llc -march=x86-64 -O0 -asm-verbose=false < %s | FileCheck %s + +; Fast-isel shouldn't reload the argument values from the stack. + +; CHECK: foo: +; CHECK-NEXT: movq %rdi, -8(%rsp) +; CHECK-NEXT: movq %rsi, -16(%rsp) +; CHECK-NEXT: movsd 128(%rsi,%rdi,8), %xmm0 +; CHECK-NEXT: ret + +define double @foo(i64 %x, double* %p) nounwind { +entry: + %x.addr = alloca i64, align 8 ; <i64*> [#uses=2] + %p.addr = alloca double*, align 8 ; <double**> [#uses=2] + store i64 %x, i64* %x.addr + store double* %p, double** %p.addr + %tmp = load i64* %x.addr ; <i64> [#uses=1] + %tmp1 = load double** %p.addr ; <double*> [#uses=1] + %add = add nsw i64 %tmp, 16 ; <i64> [#uses=1] + %arrayidx = getelementptr inbounds double* %tmp1, i64 %add ; <double*> [#uses=1] + %tmp2 = load double* %arrayidx ; <double> [#uses=1] + ret double %tmp2 +} diff --git a/test/CodeGen/X86/fast-isel-shift-imm.ll b/test/CodeGen/X86/fast-isel-shift-imm.ll index 35f7a72a285c..7759bb056892 100644 --- a/test/CodeGen/X86/fast-isel-shift-imm.ll +++ b/test/CodeGen/X86/fast-isel-shift-imm.ll @@ -1,7 +1,8 @@ ; RUN: llc < %s -march=x86 -O0 | grep {sarl \$80, %eax} ; PR3242 -define i32 @foo(i32 %x) nounwind { +define void @foo(i32 %x, i32* %p) nounwind { %y = ashr i32 %x, 50000 - ret i32 %y + store i32 %y, i32* %p + ret void } diff --git a/test/CodeGen/X86/fast-isel-x86.ll b/test/CodeGen/X86/fast-isel-x86.ll new file mode 100644 index 000000000000..56aeb3a34364 --- /dev/null +++ b/test/CodeGen/X86/fast-isel-x86.ll @@ -0,0 +1,33 @@ +; RUN: llc -march=x86 -relocation-model=pic < %s + +; This should use flds to set the return value. +; CHECK: test0: +; CHECK: flds +; CHECK: ret +@G = external global float +define float @test0() nounwind { + %t = load float* @G + ret float %t +} + +; This should pop 4 bytes on return. +; CHECK: test1: +; CHECK: ret $4 +define void @test1({i32, i32, i32, i32}* sret %p) nounwind { + store {i32, i32, i32, i32} zeroinitializer, {i32, i32, i32, i32}* %p + ret void +} + +; Properly initialize the pic base. +; CHECK: test2: +; CHECK-NOT: HHH +; CHECK: call{{.*}}L2$pb +; CHECK-NEXT: L2$pb: +; CHECK-NEXT: pop +; CHECK: HHH +; CHECK: ret +@HHH = external global i32 +define i32 @test2() nounwind { + %t = load i32* @HHH + ret i32 %t +} diff --git a/test/CodeGen/X86/fast-isel.ll b/test/CodeGen/X86/fast-isel.ll index 3d26ae7018b5..177c06b45dcd 100644 --- a/test/CodeGen/X86/fast-isel.ll +++ b/test/CodeGen/X86/fast-isel.ll @@ -49,9 +49,10 @@ entry: ret i32 %tmp2 } -define i1 @ptrtoint_i1(i8* %p) nounwind { +define void @ptrtoint_i1(i8* %p, i1* %q) nounwind { %t = ptrtoint i8* %p to i1 - ret i1 %t + store i1 %t, i1* %q + ret void } define i8* @inttoptr_i1(i1 %p) nounwind { %t = inttoptr i1 %p to i8* @@ -86,11 +87,8 @@ define i8 @mul_i8(i8 %a) nounwind { ret i8 %tmp } -define void @store_i1(i1* %p, i1 %t) nounwind { - store i1 %t, i1* %p - ret void -} -define i1 @load_i1(i1* %p) nounwind { +define void @load_store_i1(i1* %p, i1* %q) nounwind { %t = load i1* %p - ret i1 %t + store i1 %t, i1* %q + ret void } diff --git a/test/CodeGen/X86/fp-stack-O0-crash.ll b/test/CodeGen/X86/fp-stack-O0-crash.ll index bbadca5b861c..9b629c08652c 100644 --- a/test/CodeGen/X86/fp-stack-O0-crash.ll +++ b/test/CodeGen/X86/fp-stack-O0-crash.ll @@ -1,4 +1,3 @@ -; RUN: llc %s -O0 -fast-isel -regalloc=local -o - ; RUN: llc %s -O0 -fast-isel -regalloc=fast -o - ; PR4767 diff --git a/test/CodeGen/X86/hidden-vis-5.ll b/test/CodeGen/X86/hidden-vis-pic.ll index 88fae37a1687..ba130a2c1c86 100644 --- a/test/CodeGen/X86/hidden-vis-5.ll +++ b/test/CodeGen/X86/hidden-vis-pic.ll @@ -1,4 +1,27 @@ ; RUN: llc < %s -mtriple=i386-apple-darwin9 -relocation-model=pic -disable-fp-elim -unwind-tables | FileCheck %s + + + +; PR7353 PR7334 rdar://8072315 rdar://8018308 + +define available_externally hidden +void @_ZNSbIcED1Ev() nounwind readnone ssp align 2 { +entry: + ret void +} + +define void()* @test1() nounwind { +entry: + ret void()* @_ZNSbIcED1Ev +} + +; This must use movl of the stub, not an lea, since the function isn't being +; emitted here. +; CHECK: movl L__ZNSbIcED1Ev$non_lazy_ptr-L1$pb( + + + + ; <rdar://problem/7383328> @.str = private constant [12 x i8] c"hello world\00", align 1 ; <[12 x i8]*> [#uses=1] @@ -28,3 +51,5 @@ return: ; preds = %entry ; CHECK: .private_extern _func.eh ; CHECK: .private_extern _main.eh + + diff --git a/test/CodeGen/X86/imp-def-copies.ll b/test/CodeGen/X86/imp-def-copies.ll deleted file mode 100644 index 91178403876f..000000000000 --- a/test/CodeGen/X86/imp-def-copies.ll +++ /dev/null @@ -1,29 +0,0 @@ -; RUN: llc < %s -march=x86 | not grep mov - - %struct.active_line = type { %struct.gs_fixed_point, %struct.gs_fixed_point, i32, i32, i32, %struct.line_segment*, i32, i16, i16, %struct.active_line*, %struct.active_line* } - %struct.gs_fixed_point = type { i32, i32 } - %struct.line_list = type { %struct.active_line*, i32, i16, %struct.active_line*, %struct.active_line*, %struct.active_line*, %struct.active_line, i32 } - %struct.line_segment = type { %struct.line_segment*, %struct.line_segment*, i32, %struct.gs_fixed_point } - %struct.subpath = type { %struct.line_segment*, %struct.line_segment*, i32, %struct.gs_fixed_point, %struct.line_segment*, i32, i32, i8 } - -define fastcc void @add_y_list(%struct.subpath* %ppath.0.4.val, i16 signext %tag, %struct.line_list* %ll, i32 %pbox.0.0.1.val, i32 %pbox.0.1.0.val, i32 %pbox.0.1.1.val) nounwind { -entry: - br i1 false, label %return, label %bb -bb: ; preds = %bb280, %entry - %psub.1.reg2mem.0 = phi %struct.subpath* [ %psub.0.reg2mem.0, %bb280 ], [ undef, %entry ] ; <%struct.subpath*> [#uses=1] - %plast.1.reg2mem.0 = phi %struct.line_segment* [ %plast.0.reg2mem.0, %bb280 ], [ undef, %entry ] ; <%struct.line_segment*> [#uses=1] - %prev_dir.0.reg2mem.0 = phi i32 [ %dir.0.reg2mem.0, %bb280 ], [ undef, %entry ] ; <i32> [#uses=1] - br i1 false, label %bb280, label %bb109 -bb109: ; preds = %bb - %tmp113 = icmp sgt i32 0, %prev_dir.0.reg2mem.0 ; <i1> [#uses=1] - br i1 %tmp113, label %bb116, label %bb280 -bb116: ; preds = %bb109 - ret void -bb280: ; preds = %bb109, %bb - %psub.0.reg2mem.0 = phi %struct.subpath* [ null, %bb ], [ %psub.1.reg2mem.0, %bb109 ] ; <%struct.subpath*> [#uses=1] - %plast.0.reg2mem.0 = phi %struct.line_segment* [ null, %bb ], [ %plast.1.reg2mem.0, %bb109 ] ; <%struct.line_segment*> [#uses=1] - %dir.0.reg2mem.0 = phi i32 [ 0, %bb ], [ 0, %bb109 ] ; <i32> [#uses=1] - br i1 false, label %return, label %bb -return: ; preds = %bb280, %entry - ret void -} diff --git a/test/CodeGen/X86/inline-asm-fpstack.ll b/test/CodeGen/X86/inline-asm-fpstack.ll index 09b09295153e..6348fcaf7a07 100644 --- a/test/CodeGen/X86/inline-asm-fpstack.ll +++ b/test/CodeGen/X86/inline-asm-fpstack.ll @@ -1,42 +1,87 @@ -; RUN: llc < %s -march=x86 +; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s +; There should be no stack manipulations between the inline asm and ret. +; CHECK: test1 +; CHECK: InlineAsm End +; CHECK-NEXT: ret define x86_fp80 @test1() { %tmp85 = call x86_fp80 asm sideeffect "fld0", "={st(0)}"() ret x86_fp80 %tmp85 } +; CHECK: test2 +; CHECK: InlineAsm End +; CHECK-NEXT: ret define double @test2() { %tmp85 = call double asm sideeffect "fld0", "={st(0)}"() ret double %tmp85 } +; Setting up argument in st(0) should be a single fld. +; CHECK: test3 +; CHECK: fld +; CHECK-NEXT: InlineAsm Start +; Asm consumes stack, nothing should be popped. +; CHECK: InlineAsm End +; CHECK-NOT: fstp +; CHECK: ret define void @test3(x86_fp80 %X) { call void asm sideeffect "frob ", "{st(0)},~{dirflag},~{fpsr},~{flags}"( x86_fp80 %X) ret void } +; CHECK: test4 +; CHECK: fld +; CHECK-NEXT: InlineAsm Start +; CHECK: InlineAsm End +; CHECK-NOT: fstp +; CHECK: ret define void @test4(double %X) { call void asm sideeffect "frob ", "{st(0)},~{dirflag},~{fpsr},~{flags}"( double %X) ret void } +; Same as test3/4, but using value from fadd. +; The fadd can be done in xmm or x87 regs - we don't test that. +; CHECK: test5 +; CHECK: InlineAsm End +; CHECK-NOT: fstp +; CHECK: ret define void @test5(double %X) { %Y = fadd double %X, 123.0 call void asm sideeffect "frob ", "{st(0)},~{dirflag},~{fpsr},~{flags}"( double %Y) ret void } +; CHECK: test6 define void @test6(double %A, double %B, double %C, double %D, double %E) nounwind { entry: - ; Uses the same value twice, should have one fstp after the asm. +; Uses the same value twice, should have one fstp after the asm. +; CHECK: foo +; CHECK: InlineAsm End +; CHECK-NEXT: fstp +; CHECK-NOT: fstp tail call void asm sideeffect "foo $0 $1", "f,f,~{dirflag},~{fpsr},~{flags}"( double %A, double %A ) nounwind - ; Uses two different values, should be in st(0)/st(1) and both be popped. +; Uses two different values, should be in st(0)/st(1) and both be popped. +; CHECK: bar +; CHECK: InlineAsm End +; CHECK-NEXT: fstp +; CHECK-NEXT: fstp tail call void asm sideeffect "bar $0 $1", "f,f,~{dirflag},~{fpsr},~{flags}"( double %B, double %C ) nounwind - ; Uses two different values, one of which isn't killed in this asm, it - ; should not be popped after the asm. +; Uses two different values, one of which isn't killed in this asm, it +; should not be popped after the asm. +; CHECK: baz +; CHECK: InlineAsm End +; CHECK-NEXT: fstp +; CHECK-NOT: fstp tail call void asm sideeffect "baz $0 $1", "f,f,~{dirflag},~{fpsr},~{flags}"( double %D, double %E ) nounwind - ; This is the last use of %D, so it should be popped after. +; This is the last use of %D, so it should be popped after. +; CHECK: baz +; CHECK: InlineAsm End +; CHECK-NEXT: fstp +; CHECK-NOT: fstp +; CHECK: ret tail call void asm sideeffect "baz $0", "f,~{dirflag},~{fpsr},~{flags}"( double %D ) nounwind ret void } diff --git a/test/CodeGen/X86/inline-asm-fpstack2.ll b/test/CodeGen/X86/inline-asm-fpstack2.ll index ffa6ee6e019e..78037e0423a5 100644 --- a/test/CodeGen/X86/inline-asm-fpstack2.ll +++ b/test/CodeGen/X86/inline-asm-fpstack2.ll @@ -1,10 +1,21 @@ -; RUN: llc < %s -march=x86 > %t -; RUN: grep {fld %%st(0)} %t +; RUN: llc < %s -march=x86 | FileCheck %s ; PR4185 +; Passing a non-killed value to asm in {st}. +; Make sure it is duped before. +; asm kills st(0), so we shouldn't pop anything +; CHECK: fld %st(0) +; CHECK: fistpl +; CHECK-NOT: fstp +; CHECK: fistpl +; CHECK-NOT: fstp +; CHECK: ret define void @test() { return: call void asm sideeffect "fistpl $0", "{st}"(double 1.000000e+06) call void asm sideeffect "fistpl $0", "{st}"(double 1.000000e+06) ret void } + +; A valid alternative would be to remat the constant pool load before each +; inline asm. diff --git a/test/CodeGen/X86/inline-asm-fpstack3.ll b/test/CodeGen/X86/inline-asm-fpstack3.ll index 17945fe4149e..a609681c4923 100644 --- a/test/CodeGen/X86/inline-asm-fpstack3.ll +++ b/test/CodeGen/X86/inline-asm-fpstack3.ll @@ -1,11 +1,14 @@ -; RUN: llc < %s -march=x86 > %t -; RUN: grep {fld %%st(0)} %t +; RUN: llc < %s -march=x86 | FileCheck %s ; PR4459 -declare x86_fp80 @ceil(x86_fp80) - -declare void @test(x86_fp80) - +; The return value from ceil must be duped before being consumed by asm. +; CHECK: ceil +; CHECK: fld %st(0) +; CHECK-NOT: fxch +; CHECK: fistpl +; CHECK-NOT: fxch +; CHECK: fstpt +; CHECK: test define void @test2(x86_fp80 %a) { entry: %0 = call x86_fp80 @ceil(x86_fp80 %a) @@ -13,3 +16,5 @@ entry: call void @test(x86_fp80 %0 ) ret void } +declare x86_fp80 @ceil(x86_fp80) +declare void @test(x86_fp80) diff --git a/test/CodeGen/X86/inline-asm-fpstack4.ll b/test/CodeGen/X86/inline-asm-fpstack4.ll index bae2970db4ab..ec572b45238a 100644 --- a/test/CodeGen/X86/inline-asm-fpstack4.ll +++ b/test/CodeGen/X86/inline-asm-fpstack4.ll @@ -1,10 +1,17 @@ -; RUN: llc < %s -march=x86 +; RUN: llc < %s -march=x86 | FileCheck %s ; PR4484 -declare x86_fp80 @ceil() - -declare void @test(x86_fp80) - +; ceil leaves a value on the stack that is needed after the asm. +; CHECK: ceil +; CHECK-NOT: fstp +; Load %a from stack after ceil +; CHECK: fldt +; CHECK-NOT: fxch +; CHECK: fistpl +; CHECK-NOT: fstp +; Set up call to test. +; CHECK: fstpt +; CHECK: test define void @test2(x86_fp80 %a) { entry: %0 = call x86_fp80 @ceil() @@ -13,3 +20,5 @@ entry: ret void } +declare x86_fp80 @ceil() +declare void @test(x86_fp80) diff --git a/test/CodeGen/X86/inline-asm-tied.ll b/test/CodeGen/X86/inline-asm-tied.ll index 1f4a13f54b75..79b688551eb9 100644 --- a/test/CodeGen/X86/inline-asm-tied.ll +++ b/test/CodeGen/X86/inline-asm-tied.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin9 -O0 | grep {movl %edx, 12(%esp)} | count 2 +; RUN: llc < %s -mtriple=i386-apple-darwin9 -O0 -regalloc=linearscan | grep {movl %edx, 4(%esp)} | count 2 ; rdar://6992609 target triple = "i386-apple-darwin9.0" diff --git a/test/CodeGen/X86/ins_subreg_coalesce-3.ll b/test/CodeGen/X86/ins_subreg_coalesce-3.ll index 8c1c40976605..63881e0ccb57 100644 --- a/test/CodeGen/X86/ins_subreg_coalesce-3.ll +++ b/test/CodeGen/X86/ins_subreg_coalesce-3.ll @@ -39,8 +39,7 @@ bb650: ; preds = %bb650, %bb428 %tmp659 = icmp eq i8 %tmp658, 0 ; <i1> [#uses=1] br i1 %tmp659, label %bb650, label %bb662 bb662: ; preds = %bb650 - %tmp685 = icmp eq %struct.rec* null, null ; <i1> [#uses=1] - br i1 %tmp685, label %bb761, label %bb688 + br label %bb761 bb688: ; preds = %bb662 ret void bb761: ; preds = %bb662 diff --git a/test/CodeGen/X86/iv-users-in-other-loops.ll b/test/CodeGen/X86/iv-users-in-other-loops.ll index 408fb20b8d89..8385a29fa22b 100644 --- a/test/CodeGen/X86/iv-users-in-other-loops.ll +++ b/test/CodeGen/X86/iv-users-in-other-loops.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=x86-64 -o %t ; RUN: not grep inc %t ; RUN: grep dec %t | count 2 -; RUN: grep addq %t | count 13 +; RUN: grep addq %t | count 12 ; RUN: not grep addb %t ; RUN: not grep leaq %t ; RUN: not grep leal %t diff --git a/test/CodeGen/X86/leaf-fp-elim.ll b/test/CodeGen/X86/leaf-fp-elim.ll new file mode 100644 index 000000000000..607dc72e2fa3 --- /dev/null +++ b/test/CodeGen/X86/leaf-fp-elim.ll @@ -0,0 +1,30 @@ +; RUN: llc < %s -disable-non-leaf-fp-elim -relocation-model=pic -mtriple=x86_64-apple-darwin | FileCheck %s +; <rdar://problem/8170192> +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin11.0" + +@msg = internal global i8* null ; <i8**> [#uses=1] +@.str = private constant [2 x i8] c"x\00", align 1 ; <[2 x i8]*> [#uses=1] + +define void @test(i8* %p) nounwind optsize ssp { + +; No stack frame, please. +; CHECK: _test +; CHECK-NOT: pushq %rbp +; CHECK-NOT: movq %rsp, %rbp +; CHECK: InlineAsm Start + +entry: + %0 = icmp eq i8* %p, null ; <i1> [#uses=1] + br i1 %0, label %return, label %bb + +bb: ; preds = %entry + tail call void asm "mov $1, $0", "=*m,{cx},~{dirflag},~{fpsr},~{flags}"(i8** @msg, i8* getelementptr inbounds ([2 x i8]* @.str, i64 0, i64 0)) nounwind + tail call void @llvm.trap() + unreachable + +return: ; preds = %entry + ret void +} + +declare void @llvm.trap() nounwind diff --git a/test/CodeGen/X86/licm-nested.ll b/test/CodeGen/X86/licm-nested.ll new file mode 100644 index 000000000000..71685bb5b83a --- /dev/null +++ b/test/CodeGen/X86/licm-nested.ll @@ -0,0 +1,89 @@ +; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -stats -info-output-file - | grep machine-licm | grep 2 + +; MachineLICM should be able to hoist the symbolic addresses out of +; the inner loops. + +@main.flags = internal global [8193 x i8] zeroinitializer, align 16 ; <[8193 x i8]*> [#uses=3] +@.str = private constant [11 x i8] c"Count: %d\0A\00" ; <[11 x i8]*> [#uses=1] + +define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp { +entry: + %cmp = icmp eq i32 %argc, 2 ; <i1> [#uses=1] + br i1 %cmp, label %while.cond.preheader, label %bb.nph53 + +while.cond.preheader: ; preds = %entry + %arrayidx = getelementptr inbounds i8** %argv, i64 1 ; <i8**> [#uses=1] + %tmp2 = load i8** %arrayidx ; <i8*> [#uses=1] + %call = tail call i32 @atoi(i8* %tmp2) nounwind ; <i32> [#uses=2] + %tobool51 = icmp eq i32 %call, 0 ; <i1> [#uses=1] + br i1 %tobool51, label %while.end, label %bb.nph53 + +while.cond.loopexit: ; preds = %for.inc35 + %indvar.next77 = add i32 %indvar76, 1 ; <i32> [#uses=2] + %exitcond78 = icmp eq i32 %indvar.next77, %NUM.0.ph80 ; <i1> [#uses=1] + br i1 %exitcond78, label %while.end, label %bb.nph + +bb.nph53: ; preds = %entry, %while.cond.preheader + %NUM.0.ph80 = phi i32 [ %call, %while.cond.preheader ], [ 17000, %entry ] ; <i32> [#uses=1] + br label %bb.nph + +bb.nph: ; preds = %while.cond.loopexit, %bb.nph53 + %indvar76 = phi i32 [ 0, %bb.nph53 ], [ %indvar.next77, %while.cond.loopexit ] ; <i32> [#uses=1] + br label %for.body + +for.body: ; preds = %for.body, %bb.nph + %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] ; <i64> [#uses=2] + %tmp = add i64 %indvar, 2 ; <i64> [#uses=1] + %arrayidx10 = getelementptr [8193 x i8]* @main.flags, i64 0, i64 %tmp ; <i8*> [#uses=1] + store i8 1, i8* %arrayidx10 + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %indvar.next, 8191 ; <i1> [#uses=1] + br i1 %exitcond, label %for.body15, label %for.body + +for.body15: ; preds = %for.body, %for.inc35 + %indvar57 = phi i64 [ %indvar.next58, %for.inc35 ], [ 0, %for.body ] ; <i64> [#uses=4] + %count.248 = phi i32 [ %count.1, %for.inc35 ], [ 0, %for.body ] ; <i32> [#uses=2] + %tmp68 = add i64 %indvar57, 2 ; <i64> [#uses=2] + %tmp70 = mul i64 %indvar57, 3 ; <i64> [#uses=1] + %tmp71 = add i64 %tmp70, 6 ; <i64> [#uses=1] + %tmp73 = shl i64 %indvar57, 1 ; <i64> [#uses=1] + %add = add i64 %tmp73, 4 ; <i64> [#uses=2] + %arrayidx17 = getelementptr [8193 x i8]* @main.flags, i64 0, i64 %tmp68 ; <i8*> [#uses=1] + %tmp18 = load i8* %arrayidx17 ; <i8> [#uses=1] + %tobool19 = icmp eq i8 %tmp18, 0 ; <i1> [#uses=1] + br i1 %tobool19, label %for.inc35, label %if.then + +if.then: ; preds = %for.body15 + %cmp2443 = icmp slt i64 %add, 8193 ; <i1> [#uses=1] + br i1 %cmp2443, label %for.body25, label %for.end32 + +for.body25: ; preds = %if.then, %for.body25 + %indvar55 = phi i64 [ %indvar.next56, %for.body25 ], [ 0, %if.then ] ; <i64> [#uses=2] + %tmp60 = mul i64 %tmp68, %indvar55 ; <i64> [#uses=2] + %tmp75 = add i64 %add, %tmp60 ; <i64> [#uses=1] + %arrayidx27 = getelementptr [8193 x i8]* @main.flags, i64 0, i64 %tmp75 ; <i8*> [#uses=1] + store i8 0, i8* %arrayidx27 + %add31 = add i64 %tmp71, %tmp60 ; <i64> [#uses=1] + %cmp24 = icmp slt i64 %add31, 8193 ; <i1> [#uses=1] + %indvar.next56 = add i64 %indvar55, 1 ; <i64> [#uses=1] + br i1 %cmp24, label %for.body25, label %for.end32 + +for.end32: ; preds = %for.body25, %if.then + %inc34 = add nsw i32 %count.248, 1 ; <i32> [#uses=1] + br label %for.inc35 + +for.inc35: ; preds = %for.body15, %for.end32 + %count.1 = phi i32 [ %inc34, %for.end32 ], [ %count.248, %for.body15 ] ; <i32> [#uses=2] + %indvar.next58 = add i64 %indvar57, 1 ; <i64> [#uses=2] + %exitcond67 = icmp eq i64 %indvar.next58, 8191 ; <i1> [#uses=1] + br i1 %exitcond67, label %while.cond.loopexit, label %for.body15 + +while.end: ; preds = %while.cond.loopexit, %while.cond.preheader + %count.0.lcssa = phi i32 [ 0, %while.cond.preheader ], [ %count.1, %while.cond.loopexit ] ; <i32> [#uses=1] + %call40 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i64 0, i64 0), i32 %count.0.lcssa) nounwind ; <i32> [#uses=0] + ret i32 0 +} + +declare i32 @atoi(i8* nocapture) nounwind readonly + +declare i32 @printf(i8* nocapture, ...) nounwind diff --git a/test/CodeGen/X86/liveness-local-regalloc.ll b/test/CodeGen/X86/liveness-local-regalloc.ll index 8cac3f830351..b469d0837dc5 100644 --- a/test/CodeGen/X86/liveness-local-regalloc.ll +++ b/test/CodeGen/X86/liveness-local-regalloc.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -O3 -regalloc=local -mtriple=x86_64-apple-darwin10 ; RUN: llc < %s -O3 -regalloc=fast -mtriple=x86_64-apple-darwin10 ; <rdar://problem/7755473> diff --git a/test/CodeGen/X86/local-liveness.ll b/test/CodeGen/X86/local-liveness.ll deleted file mode 100644 index 321f208e75ca..000000000000 --- a/test/CodeGen/X86/local-liveness.ll +++ /dev/null @@ -1,31 +0,0 @@ -; RUN: llc < %s -march=x86 -regalloc=local | grep {subl %eax, %edx} - -; Local regalloc shouldn't assume that both the uses of the -; sub instruction are kills, because one of them is tied -; to an output. Previously, it was allocating both inputs -; in the same register. - -define i32 @func_3() nounwind { -entry: - %retval = alloca i32 ; <i32*> [#uses=2] - %g_323 = alloca i8 ; <i8*> [#uses=2] - %p_5 = alloca i64, align 8 ; <i64*> [#uses=2] - %0 = alloca i32 ; <i32*> [#uses=2] - %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] - store i64 0, i64* %p_5, align 8 - store i8 1, i8* %g_323, align 1 - %1 = load i8* %g_323, align 1 ; <i8> [#uses=1] - %2 = sext i8 %1 to i64 ; <i64> [#uses=1] - %3 = load i64* %p_5, align 8 ; <i64> [#uses=1] - %4 = sub i64 %3, %2 ; <i64> [#uses=1] - %5 = icmp sge i64 %4, 0 ; <i1> [#uses=1] - %6 = zext i1 %5 to i32 ; <i32> [#uses=1] - store i32 %6, i32* %0, align 4 - %7 = load i32* %0, align 4 ; <i32> [#uses=1] - store i32 %7, i32* %retval, align 4 - br label %return - -return: ; preds = %entry - %retval1 = load i32* %retval ; <i32> [#uses=1] - ret i32 %retval1 -} diff --git a/test/CodeGen/X86/loop-strength-reduce6.ll b/test/CodeGen/X86/loop-strength-reduce6.ll index bbafcf7cbc7f..919f836841ff 100644 --- a/test/CodeGen/X86/loop-strength-reduce6.ll +++ b/test/CodeGen/X86/loop-strength-reduce6.ll @@ -2,22 +2,22 @@ define fastcc i32 @decodeMP3(i32 %isize, i32* %done) nounwind { entry: - br i1 false, label %cond_next191, label %cond_true189 + br label %cond_true189 cond_true189: ; preds = %entry ret i32 0 cond_next191: ; preds = %entry - br i1 false, label %cond_next37.i, label %cond_false.i9 + br label %cond_false.i9 cond_false.i9: ; preds = %cond_next191 ret i32 0 cond_next37.i: ; preds = %cond_next191 - br i1 false, label %cond_false50.i, label %cond_true44.i + br label %cond_true44.i cond_true44.i: ; preds = %cond_next37.i - br i1 false, label %cond_true11.i.i, label %bb414.preheader.i + br label %bb414.preheader.i cond_true11.i.i: ; preds = %cond_true44.i ret i32 0 @@ -26,19 +26,19 @@ cond_false50.i: ; preds = %cond_next37.i ret i32 0 bb414.preheader.i: ; preds = %cond_true44.i - br i1 false, label %bb.i18, label %do_layer3.exit + br label %do_layer3.exit bb.i18: ; preds = %bb414.preheader.i - br i1 false, label %bb358.i, label %cond_true79.i + br label %cond_true79.i cond_true79.i: ; preds = %bb.i18 ret i32 0 bb331.i: ; preds = %bb358.i, %cond_true.i149.i - br i1 false, label %cond_true.i149.i, label %cond_false.i151.i + br label %cond_false.i151.i cond_true.i149.i: ; preds = %bb331.i - br i1 false, label %bb178.preheader.i.i, label %bb331.i + br label %bb331.i cond_false.i151.i: ; preds = %bb331.i ret i32 0 @@ -56,7 +56,7 @@ bb178.preheader.i.i: ; preds = %bb163.i.i, %cond_true.i149.i br label %bb163.i.i bb358.i: ; preds = %bb.i18 - br i1 false, label %bb331.i, label %bb406.i + br label %bb406.i bb406.i: ; preds = %bb358.i ret i32 0 diff --git a/test/CodeGen/X86/lsr-delayed-fold.ll b/test/CodeGen/X86/lsr-delayed-fold.ll index 8afbb0d7a36b..8ed97e447fee 100644 --- a/test/CodeGen/X86/lsr-delayed-fold.ll +++ b/test/CodeGen/X86/lsr-delayed-fold.ll @@ -132,3 +132,47 @@ for.inc131: ; preds = %for.body123, %for.b for.end134: ; preds = %for.inc131 ret void } + +; LSR needs to remember inserted instructions even in postinc mode, because +; there could be multiple subexpressions within a single expansion which +; require insert point adjustment. +; PR7306 + +define fastcc i32 @GetOptimum() nounwind { +bb: + br label %bb1 + +bb1: ; preds = %bb1, %bb + %t = phi i32 [ 0, %bb ], [ %t2, %bb1 ] ; <i32> [#uses=1] + %t2 = add i32 %t, undef ; <i32> [#uses=3] + br i1 undef, label %bb1, label %bb3 + +bb3: ; preds = %bb1 + %t4 = add i32 undef, -1 ; <i32> [#uses=1] + br label %bb5 + +bb5: ; preds = %bb16, %bb3 + %t6 = phi i32 [ %t17, %bb16 ], [ 0, %bb3 ] ; <i32> [#uses=3] + %t7 = add i32 undef, %t6 ; <i32> [#uses=2] + %t8 = add i32 %t4, %t6 ; <i32> [#uses=1] + br i1 undef, label %bb9, label %bb10 + +bb9: ; preds = %bb5 + br label %bb10 + +bb10: ; preds = %bb9, %bb5 + br i1 undef, label %bb11, label %bb16 + +bb11: ; preds = %bb10 + %t12 = icmp ugt i32 %t7, %t2 ; <i1> [#uses=1] + %t13 = select i1 %t12, i32 %t2, i32 %t7 ; <i32> [#uses=1] + br label %bb14 + +bb14: ; preds = %bb11 + store i32 %t13, i32* null + ret i32 %t8 + +bb16: ; preds = %bb10 + %t17 = add i32 %t6, 1 ; <i32> [#uses=1] + br label %bb5 +} diff --git a/test/CodeGen/X86/lsr-loop-exit-cond.ll b/test/CodeGen/X86/lsr-loop-exit-cond.ll index 474450acc9b0..938023ffe037 100644 --- a/test/CodeGen/X86/lsr-loop-exit-cond.ll +++ b/test/CodeGen/X86/lsr-loop-exit-cond.ll @@ -7,7 +7,7 @@ @Te1 = external global [256 x i32] ; <[256 x i32]*> [#uses=4] @Te3 = external global [256 x i32] ; <[256 x i32]*> [#uses=2] -define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r) nounwind ssp { +define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r) nounwind { entry: %0 = load i32* %rk, align 4 ; <i32> [#uses=1] %1 = getelementptr i32* %rk, i64 1 ; <i32*> [#uses=1] diff --git a/test/CodeGen/X86/lsr-nonaffine.ll b/test/CodeGen/X86/lsr-nonaffine.ll new file mode 100644 index 000000000000..b0d30641dd2b --- /dev/null +++ b/test/CodeGen/X86/lsr-nonaffine.ll @@ -0,0 +1,23 @@ +; RUN: llc -march=x86-64 < %s | FileCheck %s + +; LSR should compute the correct starting values for this loop. Note that +; it's not necessarily LSR's job to compute loop exit expressions; that's +; indvars' job. +; CHECK: movl $12 +; CHECK: movl $42 + +define i32 @real_symmetric_eigen(i32 %n) nounwind { +while.body127: ; preds = %while.cond122 + br label %while.cond141 + +while.cond141: ; preds = %while.cond141, %while.body127 + %0 = phi i32 [ 7, %while.body127 ], [ %indvar.next67, %while.cond141 ] ; <i32> [#uses=3] + %indvar.next67 = add i32 %0, 1 ; <i32> [#uses=1] + %t = icmp slt i32 %indvar.next67, %n + br i1 %t, label %if.then171, label %while.cond141 + +if.then171: ; preds = %while.cond141 + %mul150 = mul i32 %0, %0 ; <i32> [#uses=1] + %add174 = add i32 %mul150, %0 ; <i32> [#uses=1] + ret i32 %add174 +} diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll index b80ee0897d89..b7e69b84bf84 100644 --- a/test/CodeGen/X86/lsr-reuse.ll +++ b/test/CodeGen/X86/lsr-reuse.ll @@ -440,3 +440,312 @@ bb5: ; preds = %bb3, %entry %s.1.lcssa = phi i32 [ 0, %entry ], [ %s.0.lcssa, %bb3 ] ; <i32> [#uses=1] ret i32 %s.1.lcssa } + +; Two loops here are of particular interest; the one at %bb21, where +; we don't want to leave extra induction variables around, or use an +; lea to compute an exit condition inside the loop: + +; CHECK: test: + +; CHECK: BB10_4: +; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}} +; CHECK-NEXT: addss %xmm{{.*}}, %xmm{{.*}} +; CHECK-NEXT: mulss (%r{{[^,]*}}), %xmm{{.*}} +; CHECK-NEXT: movss %xmm{{.*}}, (%r{{[^,]*}}) +; CHECK-NEXT: addq $4, %r{{.*}} +; CHECK-NEXT: decq %r{{.*}} +; CHECK-NEXT: addq $4, %r{{.*}} +; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}} +; CHECK-NEXT: BB10_2: +; CHECK-NEXT: testq %r{{.*}}, %r{{.*}} +; CHECK-NEXT: jle +; CHECK-NEXT: testb $15, %r{{.*}} +; CHECK-NEXT: jne + +; And the one at %bb68, where we want to be sure to use superhero mode: + +; CHECK: BB10_10: +; CHECK-NEXT: movaps 48(%r{{[^,]*}}), %xmm{{.*}} +; CHECK-NEXT: mulps %xmm{{.*}}, %xmm{{.*}} +; CHECK-NEXT: movaps 32(%r{{[^,]*}}), %xmm{{.*}} +; CHECK-NEXT: mulps %xmm{{.*}}, %xmm{{.*}} +; CHECK-NEXT: movaps 16(%r{{[^,]*}}), %xmm{{.*}} +; CHECK-NEXT: mulps %xmm{{.*}}, %xmm{{.*}} +; CHECK-NEXT: movaps (%r{{[^,]*}}), %xmm{{.*}} +; CHECK-NEXT: mulps %xmm{{.*}}, %xmm{{.*}} +; CHECK-NEXT: movaps %xmm{{.*}}, (%r{{[^,]*}}) +; CHECK-NEXT: movaps %xmm{{.*}}, 16(%r{{[^,]*}}) +; CHECK-NEXT: movaps %xmm{{.*}}, 32(%r{{[^,]*}}) +; CHECK-NEXT: movaps %xmm{{.*}}, 48(%r{{[^,]*}}) +; CHECK-NEXT: addps %xmm{{.*}}, %xmm{{.*}} +; CHECK-NEXT: addps %xmm{{.*}}, %xmm{{.*}} +; CHECK-NEXT: addps %xmm{{.*}}, %xmm{{.*}} +; CHECK-NEXT: addps %xmm{{.*}}, %xmm{{.*}} +; CHECK-NEXT: addq $64, %r{{.*}} +; CHECK-NEXT: addq $64, %r{{.*}} +; CHECK-NEXT: addq $-16, %r{{.*}} +; CHECK-NEXT: BB10_11: +; CHECK-NEXT: cmpq $15, %r{{.*}} +; CHECK-NEXT: jg + +define void @test(float* %arg, i64 %arg1, float* nocapture %arg2, float* nocapture %arg3, float* %arg4, i64 %arg5, i64 %arg6) nounwind { +bb: + %t = alloca float, align 4 ; <float*> [#uses=3] + %t7 = alloca float, align 4 ; <float*> [#uses=2] + %t8 = load float* %arg3 ; <float> [#uses=8] + %t9 = ptrtoint float* %arg to i64 ; <i64> [#uses=1] + %t10 = ptrtoint float* %arg4 to i64 ; <i64> [#uses=1] + %t11 = xor i64 %t10, %t9 ; <i64> [#uses=1] + %t12 = and i64 %t11, 15 ; <i64> [#uses=1] + %t13 = icmp eq i64 %t12, 0 ; <i1> [#uses=1] + %t14 = xor i64 %arg1, 1 ; <i64> [#uses=1] + %t15 = xor i64 %arg5, 1 ; <i64> [#uses=1] + %t16 = or i64 %t15, %t14 ; <i64> [#uses=1] + %t17 = trunc i64 %t16 to i32 ; <i32> [#uses=1] + %t18 = icmp eq i32 %t17, 0 ; <i1> [#uses=1] + br i1 %t18, label %bb19, label %bb213 + +bb19: ; preds = %bb + %t20 = load float* %arg2 ; <float> [#uses=1] + br label %bb21 + +bb21: ; preds = %bb32, %bb19 + %t22 = phi i64 [ %t36, %bb32 ], [ 0, %bb19 ] ; <i64> [#uses=21] + %t23 = phi float [ %t35, %bb32 ], [ %t20, %bb19 ] ; <float> [#uses=6] + %t24 = sub i64 %arg6, %t22 ; <i64> [#uses=4] + %t25 = getelementptr float* %arg4, i64 %t22 ; <float*> [#uses=4] + %t26 = getelementptr float* %arg, i64 %t22 ; <float*> [#uses=3] + %t27 = icmp sgt i64 %t24, 0 ; <i1> [#uses=1] + br i1 %t27, label %bb28, label %bb37 + +bb28: ; preds = %bb21 + %t29 = ptrtoint float* %t25 to i64 ; <i64> [#uses=1] + %t30 = and i64 %t29, 15 ; <i64> [#uses=1] + %t31 = icmp eq i64 %t30, 0 ; <i1> [#uses=1] + br i1 %t31, label %bb37, label %bb32 + +bb32: ; preds = %bb28 + %t33 = load float* %t26 ; <float> [#uses=1] + %t34 = fmul float %t23, %t33 ; <float> [#uses=1] + store float %t34, float* %t25 + %t35 = fadd float %t23, %t8 ; <float> [#uses=1] + %t36 = add i64 %t22, 1 ; <i64> [#uses=1] + br label %bb21 + +bb37: ; preds = %bb28, %bb21 + %t38 = fmul float %t8, 4.000000e+00 ; <float> [#uses=1] + store float %t38, float* %t + %t39 = fmul float %t8, 1.600000e+01 ; <float> [#uses=1] + store float %t39, float* %t7 + %t40 = fmul float %t8, 0.000000e+00 ; <float> [#uses=1] + %t41 = fadd float %t23, %t40 ; <float> [#uses=1] + %t42 = insertelement <4 x float> undef, float %t41, i32 0 ; <<4 x float>> [#uses=1] + %t43 = fadd float %t23, %t8 ; <float> [#uses=1] + %t44 = insertelement <4 x float> %t42, float %t43, i32 1 ; <<4 x float>> [#uses=1] + %t45 = fmul float %t8, 2.000000e+00 ; <float> [#uses=1] + %t46 = fadd float %t23, %t45 ; <float> [#uses=1] + %t47 = insertelement <4 x float> %t44, float %t46, i32 2 ; <<4 x float>> [#uses=1] + %t48 = fmul float %t8, 3.000000e+00 ; <float> [#uses=1] + %t49 = fadd float %t23, %t48 ; <float> [#uses=1] + %t50 = insertelement <4 x float> %t47, float %t49, i32 3 ; <<4 x float>> [#uses=5] + %t51 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %t) nounwind ; <<4 x float>> [#uses=3] + %t52 = fadd <4 x float> %t50, %t51 ; <<4 x float>> [#uses=3] + %t53 = fadd <4 x float> %t52, %t51 ; <<4 x float>> [#uses=3] + %t54 = fadd <4 x float> %t53, %t51 ; <<4 x float>> [#uses=2] + %t55 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %t7) nounwind ; <<4 x float>> [#uses=8] + %t56 = icmp sgt i64 %t24, 15 ; <i1> [#uses=2] + br i1 %t13, label %bb57, label %bb118 + +bb57: ; preds = %bb37 + br i1 %t56, label %bb61, label %bb112 + +bb58: ; preds = %bb68 + %t59 = getelementptr float* %arg, i64 %t78 ; <float*> [#uses=1] + %t60 = getelementptr float* %arg4, i64 %t78 ; <float*> [#uses=1] + br label %bb112 + +bb61: ; preds = %bb57 + %t62 = add i64 %t22, 16 ; <i64> [#uses=1] + %t63 = add i64 %t22, 4 ; <i64> [#uses=1] + %t64 = add i64 %t22, 8 ; <i64> [#uses=1] + %t65 = add i64 %t22, 12 ; <i64> [#uses=1] + %t66 = add i64 %arg6, -16 ; <i64> [#uses=1] + %t67 = sub i64 %t66, %t22 ; <i64> [#uses=1] + br label %bb68 + +bb68: ; preds = %bb68, %bb61 + %t69 = phi i64 [ 0, %bb61 ], [ %t111, %bb68 ] ; <i64> [#uses=3] + %t70 = phi <4 x float> [ %t54, %bb61 ], [ %t107, %bb68 ] ; <<4 x float>> [#uses=2] + %t71 = phi <4 x float> [ %t50, %bb61 ], [ %t103, %bb68 ] ; <<4 x float>> [#uses=2] + %t72 = phi <4 x float> [ %t53, %bb61 ], [ %t108, %bb68 ] ; <<4 x float>> [#uses=2] + %t73 = phi <4 x float> [ %t52, %bb61 ], [ %t109, %bb68 ] ; <<4 x float>> [#uses=2] + %t74 = shl i64 %t69, 4 ; <i64> [#uses=5] + %t75 = add i64 %t22, %t74 ; <i64> [#uses=2] + %t76 = getelementptr float* %arg, i64 %t75 ; <float*> [#uses=1] + %t77 = bitcast float* %t76 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t78 = add i64 %t62, %t74 ; <i64> [#uses=2] + %t79 = add i64 %t63, %t74 ; <i64> [#uses=2] + %t80 = getelementptr float* %arg, i64 %t79 ; <float*> [#uses=1] + %t81 = bitcast float* %t80 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t82 = add i64 %t64, %t74 ; <i64> [#uses=2] + %t83 = getelementptr float* %arg, i64 %t82 ; <float*> [#uses=1] + %t84 = bitcast float* %t83 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t85 = add i64 %t65, %t74 ; <i64> [#uses=2] + %t86 = getelementptr float* %arg, i64 %t85 ; <float*> [#uses=1] + %t87 = bitcast float* %t86 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t88 = getelementptr float* %arg4, i64 %t75 ; <float*> [#uses=1] + %t89 = bitcast float* %t88 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t90 = getelementptr float* %arg4, i64 %t79 ; <float*> [#uses=1] + %t91 = bitcast float* %t90 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t92 = getelementptr float* %arg4, i64 %t82 ; <float*> [#uses=1] + %t93 = bitcast float* %t92 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t94 = getelementptr float* %arg4, i64 %t85 ; <float*> [#uses=1] + %t95 = bitcast float* %t94 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t96 = mul i64 %t69, -16 ; <i64> [#uses=1] + %t97 = add i64 %t67, %t96 ; <i64> [#uses=2] + %t98 = load <4 x float>* %t77 ; <<4 x float>> [#uses=1] + %t99 = load <4 x float>* %t81 ; <<4 x float>> [#uses=1] + %t100 = load <4 x float>* %t84 ; <<4 x float>> [#uses=1] + %t101 = load <4 x float>* %t87 ; <<4 x float>> [#uses=1] + %t102 = fmul <4 x float> %t98, %t71 ; <<4 x float>> [#uses=1] + %t103 = fadd <4 x float> %t71, %t55 ; <<4 x float>> [#uses=2] + %t104 = fmul <4 x float> %t99, %t73 ; <<4 x float>> [#uses=1] + %t105 = fmul <4 x float> %t100, %t72 ; <<4 x float>> [#uses=1] + %t106 = fmul <4 x float> %t101, %t70 ; <<4 x float>> [#uses=1] + store <4 x float> %t102, <4 x float>* %t89 + store <4 x float> %t104, <4 x float>* %t91 + store <4 x float> %t105, <4 x float>* %t93 + store <4 x float> %t106, <4 x float>* %t95 + %t107 = fadd <4 x float> %t70, %t55 ; <<4 x float>> [#uses=1] + %t108 = fadd <4 x float> %t72, %t55 ; <<4 x float>> [#uses=1] + %t109 = fadd <4 x float> %t73, %t55 ; <<4 x float>> [#uses=1] + %t110 = icmp sgt i64 %t97, 15 ; <i1> [#uses=1] + %t111 = add i64 %t69, 1 ; <i64> [#uses=1] + br i1 %t110, label %bb68, label %bb58 + +bb112: ; preds = %bb58, %bb57 + %t113 = phi float* [ %t59, %bb58 ], [ %t26, %bb57 ] ; <float*> [#uses=1] + %t114 = phi float* [ %t60, %bb58 ], [ %t25, %bb57 ] ; <float*> [#uses=1] + %t115 = phi <4 x float> [ %t103, %bb58 ], [ %t50, %bb57 ] ; <<4 x float>> [#uses=1] + %t116 = phi i64 [ %t97, %bb58 ], [ %t24, %bb57 ] ; <i64> [#uses=1] + %t117 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %t) nounwind ; <<4 x float>> [#uses=0] + br label %bb194 + +bb118: ; preds = %bb37 + br i1 %t56, label %bb122, label %bb194 + +bb119: ; preds = %bb137 + %t120 = getelementptr float* %arg, i64 %t145 ; <float*> [#uses=1] + %t121 = getelementptr float* %arg4, i64 %t145 ; <float*> [#uses=1] + br label %bb194 + +bb122: ; preds = %bb118 + %t123 = add i64 %t22, -1 ; <i64> [#uses=1] + %t124 = getelementptr inbounds float* %arg, i64 %t123 ; <float*> [#uses=1] + %t125 = bitcast float* %t124 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t126 = load <4 x float>* %t125 ; <<4 x float>> [#uses=1] + %t127 = add i64 %t22, 16 ; <i64> [#uses=1] + %t128 = add i64 %t22, 3 ; <i64> [#uses=1] + %t129 = add i64 %t22, 7 ; <i64> [#uses=1] + %t130 = add i64 %t22, 11 ; <i64> [#uses=1] + %t131 = add i64 %t22, 15 ; <i64> [#uses=1] + %t132 = add i64 %t22, 4 ; <i64> [#uses=1] + %t133 = add i64 %t22, 8 ; <i64> [#uses=1] + %t134 = add i64 %t22, 12 ; <i64> [#uses=1] + %t135 = add i64 %arg6, -16 ; <i64> [#uses=1] + %t136 = sub i64 %t135, %t22 ; <i64> [#uses=1] + br label %bb137 + +bb137: ; preds = %bb137, %bb122 + %t138 = phi i64 [ 0, %bb122 ], [ %t193, %bb137 ] ; <i64> [#uses=3] + %t139 = phi <4 x float> [ %t54, %bb122 ], [ %t189, %bb137 ] ; <<4 x float>> [#uses=2] + %t140 = phi <4 x float> [ %t50, %bb122 ], [ %t185, %bb137 ] ; <<4 x float>> [#uses=2] + %t141 = phi <4 x float> [ %t53, %bb122 ], [ %t190, %bb137 ] ; <<4 x float>> [#uses=2] + %t142 = phi <4 x float> [ %t52, %bb122 ], [ %t191, %bb137 ] ; <<4 x float>> [#uses=2] + %t143 = phi <4 x float> [ %t126, %bb122 ], [ %t175, %bb137 ] ; <<4 x float>> [#uses=1] + %t144 = shl i64 %t138, 4 ; <i64> [#uses=9] + %t145 = add i64 %t127, %t144 ; <i64> [#uses=2] + %t146 = add i64 %t128, %t144 ; <i64> [#uses=1] + %t147 = getelementptr float* %arg, i64 %t146 ; <float*> [#uses=1] + %t148 = bitcast float* %t147 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t149 = add i64 %t129, %t144 ; <i64> [#uses=1] + %t150 = getelementptr float* %arg, i64 %t149 ; <float*> [#uses=1] + %t151 = bitcast float* %t150 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t152 = add i64 %t130, %t144 ; <i64> [#uses=1] + %t153 = getelementptr float* %arg, i64 %t152 ; <float*> [#uses=1] + %t154 = bitcast float* %t153 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t155 = add i64 %t131, %t144 ; <i64> [#uses=1] + %t156 = getelementptr float* %arg, i64 %t155 ; <float*> [#uses=1] + %t157 = bitcast float* %t156 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t158 = add i64 %t22, %t144 ; <i64> [#uses=1] + %t159 = getelementptr float* %arg4, i64 %t158 ; <float*> [#uses=1] + %t160 = bitcast float* %t159 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t161 = add i64 %t132, %t144 ; <i64> [#uses=1] + %t162 = getelementptr float* %arg4, i64 %t161 ; <float*> [#uses=1] + %t163 = bitcast float* %t162 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t164 = add i64 %t133, %t144 ; <i64> [#uses=1] + %t165 = getelementptr float* %arg4, i64 %t164 ; <float*> [#uses=1] + %t166 = bitcast float* %t165 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t167 = add i64 %t134, %t144 ; <i64> [#uses=1] + %t168 = getelementptr float* %arg4, i64 %t167 ; <float*> [#uses=1] + %t169 = bitcast float* %t168 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t170 = mul i64 %t138, -16 ; <i64> [#uses=1] + %t171 = add i64 %t136, %t170 ; <i64> [#uses=2] + %t172 = load <4 x float>* %t148 ; <<4 x float>> [#uses=2] + %t173 = load <4 x float>* %t151 ; <<4 x float>> [#uses=2] + %t174 = load <4 x float>* %t154 ; <<4 x float>> [#uses=2] + %t175 = load <4 x float>* %t157 ; <<4 x float>> [#uses=2] + %t176 = shufflevector <4 x float> %t143, <4 x float> %t172, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1] + %t177 = shufflevector <4 x float> %t176, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1] + %t178 = shufflevector <4 x float> %t172, <4 x float> %t173, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1] + %t179 = shufflevector <4 x float> %t178, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1] + %t180 = shufflevector <4 x float> %t173, <4 x float> %t174, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1] + %t181 = shufflevector <4 x float> %t180, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1] + %t182 = shufflevector <4 x float> %t174, <4 x float> %t175, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1] + %t183 = shufflevector <4 x float> %t182, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1] + %t184 = fmul <4 x float> %t177, %t140 ; <<4 x float>> [#uses=1] + %t185 = fadd <4 x float> %t140, %t55 ; <<4 x float>> [#uses=2] + %t186 = fmul <4 x float> %t179, %t142 ; <<4 x float>> [#uses=1] + %t187 = fmul <4 x float> %t181, %t141 ; <<4 x float>> [#uses=1] + %t188 = fmul <4 x float> %t183, %t139 ; <<4 x float>> [#uses=1] + store <4 x float> %t184, <4 x float>* %t160 + store <4 x float> %t186, <4 x float>* %t163 + store <4 x float> %t187, <4 x float>* %t166 + store <4 x float> %t188, <4 x float>* %t169 + %t189 = fadd <4 x float> %t139, %t55 ; <<4 x float>> [#uses=1] + %t190 = fadd <4 x float> %t141, %t55 ; <<4 x float>> [#uses=1] + %t191 = fadd <4 x float> %t142, %t55 ; <<4 x float>> [#uses=1] + %t192 = icmp sgt i64 %t171, 15 ; <i1> [#uses=1] + %t193 = add i64 %t138, 1 ; <i64> [#uses=1] + br i1 %t192, label %bb137, label %bb119 + +bb194: ; preds = %bb119, %bb118, %bb112 + %t195 = phi i64 [ %t116, %bb112 ], [ %t171, %bb119 ], [ %t24, %bb118 ] ; <i64> [#uses=2] + %t196 = phi <4 x float> [ %t115, %bb112 ], [ %t185, %bb119 ], [ %t50, %bb118 ] ; <<4 x float>> [#uses=1] + %t197 = phi float* [ %t114, %bb112 ], [ %t121, %bb119 ], [ %t25, %bb118 ] ; <float*> [#uses=1] + %t198 = phi float* [ %t113, %bb112 ], [ %t120, %bb119 ], [ %t26, %bb118 ] ; <float*> [#uses=1] + %t199 = extractelement <4 x float> %t196, i32 0 ; <float> [#uses=2] + %t200 = icmp sgt i64 %t195, 0 ; <i1> [#uses=1] + br i1 %t200, label %bb201, label %bb211 + +bb201: ; preds = %bb201, %bb194 + %t202 = phi i64 [ %t209, %bb201 ], [ 0, %bb194 ] ; <i64> [#uses=3] + %t203 = phi float [ %t208, %bb201 ], [ %t199, %bb194 ] ; <float> [#uses=2] + %t204 = getelementptr float* %t198, i64 %t202 ; <float*> [#uses=1] + %t205 = getelementptr float* %t197, i64 %t202 ; <float*> [#uses=1] + %t206 = load float* %t204 ; <float> [#uses=1] + %t207 = fmul float %t203, %t206 ; <float> [#uses=1] + store float %t207, float* %t205 + %t208 = fadd float %t203, %t8 ; <float> [#uses=2] + %t209 = add i64 %t202, 1 ; <i64> [#uses=2] + %t210 = icmp eq i64 %t209, %t195 ; <i1> [#uses=1] + br i1 %t210, label %bb211, label %bb201 + +bb211: ; preds = %bb201, %bb194 + %t212 = phi float [ %t199, %bb194 ], [ %t208, %bb201 ] ; <float> [#uses=1] + store float %t212, float* %arg2 + ret void + +bb213: ; preds = %bb + ret void +} diff --git a/test/CodeGen/X86/memcpy.ll b/test/CodeGen/X86/memcpy.ll index 5a3ae77caae1..7bc31bec163d 100644 --- a/test/CodeGen/X86/memcpy.ll +++ b/test/CodeGen/X86/memcpy.ll @@ -1,17 +1,57 @@ -; RUN: llc < %s -march=x86-64 | grep call.*memcpy | count 2 +; RUN: llc < %s -march=x86-64 | FileCheck %s -declare void @llvm.memcpy.i64(i8*, i8*, i64, i32) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind -define i8* @my_memcpy(i8* %a, i8* %b, i64 %n) nounwind { + +; Variable memcpy's should lower to calls. +define i8* @test1(i8* %a, i8* %b, i64 %n) nounwind { entry: - tail call void @llvm.memcpy.i64( i8* %a, i8* %b, i64 %n, i32 1 ) + tail call void @llvm.memcpy.p0i8.p0i8.i64( i8* %a, i8* %b, i64 %n, i32 1, i1 0 ) ret i8* %a + +; CHECK: test1: +; CHECK: memcpy } -define i8* @my_memcpy2(i64* %a, i64* %b, i64 %n) nounwind { +; Variable memcpy's should lower to calls. +define i8* @test2(i64* %a, i64* %b, i64 %n) nounwind { entry: %tmp14 = bitcast i64* %a to i8* %tmp25 = bitcast i64* %b to i8* - tail call void @llvm.memcpy.i64(i8* %tmp14, i8* %tmp25, i64 %n, i32 8 ) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp14, i8* %tmp25, i64 %n, i32 8, i1 0 ) ret i8* %tmp14 + +; CHECK: test2: +; CHECK: memcpy +} + +; Large constant memcpy's should lower to a call when optimizing for size. +; PR6623 +define void @test3(i8* nocapture %A, i8* nocapture %B) nounwind optsize noredzone { +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false) + ret void +; CHECK: test3: +; CHECK: memcpy } + +; Large constant memcpy's should be inlined when not optimizing for size. +define void @test4(i8* nocapture %A, i8* nocapture %B) nounwind noredzone { +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false) + ret void +; CHECK: test4: +; CHECK: movq +; CHECK: movq +; CHECK: movq +; CHECK: movq +; CHECK: movq +; CHECK: movq +; CHECK: movq +; CHECK: movq +; CHECK: movq +; CHECK: movq +; CHECK: movq +; CHECK: movq +} + diff --git a/test/CodeGen/X86/object-size.ll b/test/CodeGen/X86/object-size.ll index bbe6b2341e58..0493edc8d090 100644 --- a/test/CodeGen/X86/object-size.ll +++ b/test/CodeGen/X86/object-size.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 < %s -march=x86-64 | FileCheck %s -check-prefix=X64 +; RUN: llc -O0 -regalloc=linearscan < %s -march=x86-64 | FileCheck %s -check-prefix=X64 ; ModuleID = 'ts.c' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" diff --git a/test/CodeGen/X86/optimize-max-3.ll b/test/CodeGen/X86/optimize-max-3.ll index bf8bfa28dafd..f1e3c2772ac9 100644 --- a/test/CodeGen/X86/optimize-max-3.ll +++ b/test/CodeGen/X86/optimize-max-3.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86-64 | FileCheck %s +; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s ; LSR's OptimizeMax should eliminate the select (max). @@ -30,3 +30,47 @@ for.body: ; preds = %for.body.preheader, for.end: ; preds = %for.body, %entry ret void } + +; In this case, one of the max operands is another max, which folds, +; leaving a two-operand max which doesn't fit the usual pattern. +; OptimizeMax should handle this case. +; PR7454 + +; CHECK: _Z18GenerateStatusPagei: + +; CHECK: jle +; CHECK-NOT: cmov +; CHECK: xorl %edi, %edi +; CHECK-NEXT: align +; CHECK-NEXT: BB1_2: +; CHECK-NEXT: callq +; CHECK-NEXT: incl %ebx +; CHECK-NEXT: cmpl %r14d, %ebx +; CHECK-NEXT: movq %rax, %rdi +; CHECK-NEXT: jl + +define void @_Z18GenerateStatusPagei(i32 %jobs_to_display) nounwind { +entry: + %cmp.i = icmp sgt i32 %jobs_to_display, 0 ; <i1> [#uses=1] + %tmp = select i1 %cmp.i, i32 %jobs_to_display, i32 0 ; <i32> [#uses=3] + %cmp8 = icmp sgt i32 %tmp, 0 ; <i1> [#uses=1] + br i1 %cmp8, label %bb.nph, label %for.end + +bb.nph: ; preds = %entry + %tmp11 = icmp sgt i32 %tmp, 1 ; <i1> [#uses=1] + %smax = select i1 %tmp11, i32 %tmp, i32 1 ; <i32> [#uses=1] + br label %for.body + +for.body: ; preds = %for.body, %bb.nph + %i.010 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body ] ; <i32> [#uses=1] + %it.0.09 = phi float* [ null, %bb.nph ], [ %call.i, %for.body ] ; <float*> [#uses=1] + %call.i = call float* @_ZSt18_Rb_tree_decrementPKSt18_Rb_tree_node_base(float* %it.0.09) ; <float*> [#uses=1] + %inc = add nsw i32 %i.010, 1 ; <i32> [#uses=2] + %exitcond = icmp eq i32 %inc, %smax ; <i1> [#uses=1] + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare float* @_ZSt18_Rb_tree_decrementPKSt18_Rb_tree_node_base(float*) diff --git a/test/CodeGen/X86/phys-reg-local-regalloc.ll b/test/CodeGen/X86/phys-reg-local-regalloc.ll index 045841e7245b..8b9ea17c4e23 100644 --- a/test/CodeGen/X86/phys-reg-local-regalloc.ll +++ b/test/CodeGen/X86/phys-reg-local-regalloc.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin9 -regalloc=local | FileCheck %s -; RUN: llc -O0 < %s -march=x86 -mtriple=i386-apple-darwin9 -regalloc=local | FileCheck %s +; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin9 -regalloc=fast | FileCheck %s +; RUN: llc -O0 < %s -march=x86 -mtriple=i386-apple-darwin9 -regalloc=fast | FileCheck %s ; CHECKed instructions should be the same with or without -O0. @.str = private constant [12 x i8] c"x + y = %i\0A\00", align 1 ; <[12 x i8]*> [#uses=1] diff --git a/test/CodeGen/X86/pic.ll b/test/CodeGen/X86/pic.ll index 9506c9b5db11..a1a9759dd36c 100644 --- a/test/CodeGen/X86/pic.ll +++ b/test/CodeGen/X86/pic.ll @@ -78,8 +78,8 @@ entry: ; LINUX: call .L3$pb ; LINUX-NEXT: .L3$pb: ; LINUX: popl -; LINUX: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L3$pb), -; LINUX: movl pfoo@GOT(%esi), +; LINUX: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L3$pb), %[[REG3:e..]] +; LINUX: movl pfoo@GOT(%[[REG3]]), ; LINUX: call afoo@PLT ; LINUX: call * } @@ -189,7 +189,7 @@ bb12: ; LINUX: call .L7$pb ; LINUX: .L7$pb: ; LINUX: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L7$pb), -; LINUX: addl .LJTI7_0@GOTOFF( +; LINUX: .LJTI7_0@GOTOFF( ; LINUX: jmpl * ; LINUX: .LJTI7_0: diff --git a/test/CodeGen/X86/pr2659.ll b/test/CodeGen/X86/pr2659.ll index 27047dfdfd8c..e5daf5da9f3e 100644 --- a/test/CodeGen/X86/pr2659.ll +++ b/test/CodeGen/X86/pr2659.ll @@ -17,7 +17,7 @@ forcond.preheader: ; preds = %entry ; CHECK: %forcond.preheader.forbody_crit_edge ; CHECK: movl $1 ; CHECK-NOT: xorl -; CHECK-NEXT: movl $1 +; CHECK-NEXT: movl ifthen: ; preds = %entry ret i32 0 diff --git a/test/CodeGen/X86/promote-assert-zext.ll b/test/CodeGen/X86/promote-assert-zext.ll new file mode 100644 index 000000000000..b582806c96a4 --- /dev/null +++ b/test/CodeGen/X86/promote-assert-zext.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s | FileCheck %s +; rdar://8051990 + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin11" + +; ISel doesn't yet know how to eliminate this extra zero-extend. But until +; it knows how to do so safely, it shouldn;t eliminate it. +; CHECK: movzbl (%rdi), %eax +; CHECK: movzwl %ax, %eax + +define i64 @_ZL5matchPKtPKhiR9MatchData(i8* %tmp13) nounwind { +entry: + %tmp14 = load i8* %tmp13, align 1 + %tmp17 = zext i8 %tmp14 to i16 + br label %bb341 + +bb341: + %tmp18 = add i16 %tmp17, -1 + %tmp23 = sext i16 %tmp18 to i64 + ret i64 %tmp23 +} diff --git a/test/CodeGen/X86/shift-folding.ll b/test/CodeGen/X86/shift-folding.ll index 872817fd4953..48ca36ca9813 100644 --- a/test/CodeGen/X86/shift-folding.ll +++ b/test/CodeGen/X86/shift-folding.ll @@ -21,3 +21,8 @@ define i32* @test3(i32* %P, i32 %X) { ret i32* %P2 } +define fastcc i32 @test4(i32* %d) nounwind { + %tmp4 = load i32* %d + %tmp512 = lshr i32 %tmp4, 24 + ret i32 %tmp512 +} diff --git a/test/CodeGen/X86/sibcall-3.ll b/test/CodeGen/X86/sibcall-3.ll new file mode 100644 index 000000000000..f0d66cf7b696 --- /dev/null +++ b/test/CodeGen/X86/sibcall-3.ll @@ -0,0 +1,16 @@ +; RUN: llc < %s -mtriple=i386-unknown-unknown | FileCheck %s +; PR7193 + +define void @t1(i8* inreg %dst, i8* inreg %src, i8* inreg %len) nounwind { +; CHECK: t1: +; CHECK: call 0 + tail call void null(i8* inreg %dst, i8* inreg %src, i8* inreg %len) nounwind + ret void +} + +define void @t2(i8* inreg %dst, i8* inreg %src, i8* inreg %len) nounwind { +; CHECK: t2: +; CHECK: jmpl + tail call void null(i8* inreg %dst, i8* inreg %src) nounwind + ret void +} diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll index 031c01e9af7d..acba5288c0d1 100644 --- a/test/CodeGen/X86/sink-hoist.ll +++ b/test/CodeGen/X86/sink-hoist.ll @@ -44,26 +44,33 @@ return: ; Sink instructions with dead EFLAGS defs. -; CHECK: zzz: -; CHECK: je -; CHECK-NEXT: orb - -define zeroext i8 @zzz(i8 zeroext %a, i8 zeroext %b) nounwind readnone { -entry: - %tmp = zext i8 %a to i32 ; <i32> [#uses=1] - %tmp2 = icmp eq i8 %a, 0 ; <i1> [#uses=1] - %tmp3 = or i8 %b, -128 ; <i8> [#uses=1] - %tmp4 = and i8 %b, 127 ; <i8> [#uses=1] - %b_addr.0 = select i1 %tmp2, i8 %tmp4, i8 %tmp3 ; <i8> [#uses=1] - ret i8 %b_addr.0 -} +; FIXME: Unfail the zzz test if we can correctly mark pregs with the kill flag. +; +; See <rdar://problem/8030636>. This test isn't valid after we made machine +; sinking more conservative about sinking instructions that define a preg into a +; block when we don't know if the preg is killed within the current block. + + +; FIXMEHECK: zzz: +; FIXMEHECK: je +; FIXMEHECK-NEXT: orb + +; define zeroext i8 @zzz(i8 zeroext %a, i8 zeroext %b) nounwind readnone { +; entry: +; %tmp = zext i8 %a to i32 ; <i32> [#uses=1] +; %tmp2 = icmp eq i8 %a, 0 ; <i1> [#uses=1] +; %tmp3 = or i8 %b, -128 ; <i8> [#uses=1] +; %tmp4 = and i8 %b, 127 ; <i8> [#uses=1] +; %b_addr.0 = select i1 %tmp2, i8 %tmp4, i8 %tmp3 ; <i8> [#uses=1] +; ret i8 %b_addr.0 +; } ; Codegen should hoist and CSE these constants. ; CHECK: vv: -; CHECK: LCPI3_0(%rip), %xmm0 -; CHECK: LCPI3_1(%rip), %xmm1 -; CHECK: LCPI3_2(%rip), %xmm2 +; CHECK: LCPI2_0(%rip), %xmm0 +; CHECK: LCPI2_1(%rip), %xmm1 +; CHECK: LCPI2_2(%rip), %xmm2 ; CHECK: align ; CHECK-NOT: LCPI ; CHECK: ret diff --git a/test/CodeGen/X86/sse-commute.ll b/test/CodeGen/X86/sse-commute.ll new file mode 100644 index 000000000000..38ed644e952b --- /dev/null +++ b/test/CodeGen/X86/sse-commute.ll @@ -0,0 +1,20 @@ +; RUN: llc -march=x86-64 < %s | FileCheck %s + +; Commute the comparison to avoid a move. +; PR7500. + +; CHECK: a: +; CHECK-NOT: mov +; CHECK: pcmpeqd +define <2 x double> @a(<2 x double>, <2 x double>) nounwind readnone { +entry: + %tmp6 = bitcast <2 x double> %0 to <4 x i32> ; <<4 x i32>> [#uses=2] + %tmp4 = bitcast <2 x double> %1 to <4 x i32> ; <<4 x i32>> [#uses=1] + %cmp = icmp eq <4 x i32> %tmp6, %tmp4 ; <<4 x i1>> [#uses=1] + %sext = sext <4 x i1> %cmp to <4 x i32> ; <<4 x i32>> [#uses=1] + %and = and <4 x i32> %tmp6, %sext ; <<4 x i32>> [#uses=1] + %tmp8 = bitcast <4 x i32> %and to <2 x double> ; <<2 x double>> [#uses=1] + ret <2 x double> %tmp8 +} + + diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll index 19fbed015b2f..d265bd7366d4 100644 --- a/test/CodeGen/X86/sse-minmax.ll +++ b/test/CodeGen/X86/sse-minmax.ll @@ -44,15 +44,15 @@ define double @olt(double %x, double %y) nounwind { ; CHECK: ogt_inverse: ; CHECK-NEXT: minsd %xmm0, %xmm1 -; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; CHECK-NEXT: ret ; UNSAFE: ogt_inverse: ; UNSAFE-NEXT: minsd %xmm0, %xmm1 -; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: ogt_inverse: ; FINITE-NEXT: minsd %xmm0, %xmm1 -; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; FINITE-NEXT: ret define double @ogt_inverse(double %x, double %y) nounwind { %c = fcmp ogt double %x, %y @@ -62,15 +62,15 @@ define double @ogt_inverse(double %x, double %y) nounwind { ; CHECK: olt_inverse: ; CHECK-NEXT: maxsd %xmm0, %xmm1 -; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; CHECK-NEXT: ret ; UNSAFE: olt_inverse: ; UNSAFE-NEXT: maxsd %xmm0, %xmm1 -; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: olt_inverse: ; FINITE-NEXT: maxsd %xmm0, %xmm1 -; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; FINITE-NEXT: ret define double @olt_inverse(double %x, double %y) nounwind { %c = fcmp olt double %x, %y @@ -108,11 +108,11 @@ define double @ole(double %x, double %y) nounwind { ; CHECK-NEXT: ucomisd %xmm1, %xmm0 ; UNSAFE: oge_inverse: ; UNSAFE-NEXT: minsd %xmm0, %xmm1 -; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: oge_inverse: ; FINITE-NEXT: minsd %xmm0, %xmm1 -; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; FINITE-NEXT: ret define double @oge_inverse(double %x, double %y) nounwind { %c = fcmp oge double %x, %y @@ -124,11 +124,11 @@ define double @oge_inverse(double %x, double %y) nounwind { ; CHECK-NEXT: ucomisd %xmm0, %xmm1 ; UNSAFE: ole_inverse: ; UNSAFE-NEXT: maxsd %xmm0, %xmm1 -; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: ole_inverse: ; FINITE-NEXT: maxsd %xmm0, %xmm1 -; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; FINITE-NEXT: ret define double @ole_inverse(double %x, double %y) nounwind { %c = fcmp ole double %x, %y @@ -175,17 +175,17 @@ define double @x_olt(double %x) nounwind { ; CHECK: x_ogt_inverse: ; CHECK-NEXT: pxor %xmm1, %xmm1 ; CHECK-NEXT: minsd %xmm0, %xmm1 -; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; CHECK-NEXT: ret ; UNSAFE: x_ogt_inverse: ; UNSAFE-NEXT: pxor %xmm1, %xmm1 ; UNSAFE-NEXT: minsd %xmm0, %xmm1 -; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: x_ogt_inverse: ; FINITE-NEXT: pxor %xmm1, %xmm1 ; FINITE-NEXT: minsd %xmm0, %xmm1 -; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; FINITE-NEXT: ret define double @x_ogt_inverse(double %x) nounwind { %c = fcmp ogt double %x, 0.000000e+00 @@ -196,17 +196,17 @@ define double @x_ogt_inverse(double %x) nounwind { ; CHECK: x_olt_inverse: ; CHECK-NEXT: pxor %xmm1, %xmm1 ; CHECK-NEXT: maxsd %xmm0, %xmm1 -; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; CHECK-NEXT: ret ; UNSAFE: x_olt_inverse: ; UNSAFE-NEXT: pxor %xmm1, %xmm1 ; UNSAFE-NEXT: maxsd %xmm0, %xmm1 -; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: x_olt_inverse: ; FINITE-NEXT: pxor %xmm1, %xmm1 ; FINITE-NEXT: maxsd %xmm0, %xmm1 -; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; FINITE-NEXT: ret define double @x_olt_inverse(double %x) nounwind { %c = fcmp olt double %x, 0.000000e+00 @@ -251,12 +251,12 @@ define double @x_ole(double %x) nounwind { ; UNSAFE: x_oge_inverse: ; UNSAFE-NEXT: pxor %xmm1, %xmm1 ; UNSAFE-NEXT: minsd %xmm0, %xmm1 -; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: x_oge_inverse: ; FINITE-NEXT: pxor %xmm1, %xmm1 ; FINITE-NEXT: minsd %xmm0, %xmm1 -; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; FINITE-NEXT: ret define double @x_oge_inverse(double %x) nounwind { %c = fcmp oge double %x, 0.000000e+00 @@ -269,12 +269,12 @@ define double @x_oge_inverse(double %x) nounwind { ; UNSAFE: x_ole_inverse: ; UNSAFE-NEXT: pxor %xmm1, %xmm1 ; UNSAFE-NEXT: maxsd %xmm0, %xmm1 -; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: x_ole_inverse: ; FINITE-NEXT: pxor %xmm1, %xmm1 ; FINITE-NEXT: maxsd %xmm0, %xmm1 -; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; FINITE-NEXT: ret define double @x_ole_inverse(double %x) nounwind { %c = fcmp ole double %x, 0.000000e+00 @@ -314,11 +314,11 @@ define double @ult(double %x, double %y) nounwind { ; CHECK: ucomisd %xmm0, %xmm1 ; UNSAFE: ugt_inverse: ; UNSAFE-NEXT: minsd %xmm0, %xmm1 -; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: ugt_inverse: ; FINITE-NEXT: minsd %xmm0, %xmm1 -; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; FINITE-NEXT: ret define double @ugt_inverse(double %x, double %y) nounwind { %c = fcmp ugt double %x, %y @@ -330,11 +330,11 @@ define double @ugt_inverse(double %x, double %y) nounwind { ; CHECK: ucomisd %xmm1, %xmm0 ; UNSAFE: ult_inverse: ; UNSAFE-NEXT: maxsd %xmm0, %xmm1 -; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: ult_inverse: ; FINITE-NEXT: maxsd %xmm0, %xmm1 -; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; FINITE-NEXT: ret define double @ult_inverse(double %x, double %y) nounwind { %c = fcmp ult double %x, %y @@ -344,7 +344,7 @@ define double @ult_inverse(double %x, double %y) nounwind { ; CHECK: uge: ; CHECK-NEXT: maxsd %xmm0, %xmm1 -; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; CHECK-NEXT: ret ; UNSAFE: uge: ; UNSAFE-NEXT: maxsd %xmm1, %xmm0 @@ -360,7 +360,7 @@ define double @uge(double %x, double %y) nounwind { ; CHECK: ule: ; CHECK-NEXT: minsd %xmm0, %xmm1 -; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; CHECK-NEXT: ret ; UNSAFE: ule: ; UNSAFE-NEXT: minsd %xmm1, %xmm0 @@ -379,11 +379,11 @@ define double @ule(double %x, double %y) nounwind { ; CHECK-NEXT: ret ; UNSAFE: uge_inverse: ; UNSAFE-NEXT: minsd %xmm0, %xmm1 -; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: uge_inverse: ; FINITE-NEXT: minsd %xmm0, %xmm1 -; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; FINITE-NEXT: ret define double @uge_inverse(double %x, double %y) nounwind { %c = fcmp uge double %x, %y @@ -396,11 +396,11 @@ define double @uge_inverse(double %x, double %y) nounwind { ; CHECK-NEXT: ret ; UNSAFE: ule_inverse: ; UNSAFE-NEXT: maxsd %xmm0, %xmm1 -; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: ule_inverse: ; FINITE-NEXT: maxsd %xmm0, %xmm1 -; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; FINITE-NEXT: ret define double @ule_inverse(double %x, double %y) nounwind { %c = fcmp ule double %x, %y @@ -445,12 +445,12 @@ define double @x_ult(double %x) nounwind { ; UNSAFE: x_ugt_inverse: ; UNSAFE-NEXT: pxor %xmm1, %xmm1 ; UNSAFE-NEXT: minsd %xmm0, %xmm1 -; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: x_ugt_inverse: ; FINITE-NEXT: pxor %xmm1, %xmm1 ; FINITE-NEXT: minsd %xmm0, %xmm1 -; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; FINITE-NEXT: ret define double @x_ugt_inverse(double %x) nounwind { %c = fcmp ugt double %x, 0.000000e+00 @@ -463,12 +463,12 @@ define double @x_ugt_inverse(double %x) nounwind { ; UNSAFE: x_ult_inverse: ; UNSAFE-NEXT: pxor %xmm1, %xmm1 ; UNSAFE-NEXT: maxsd %xmm0, %xmm1 -; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: x_ult_inverse: ; FINITE-NEXT: pxor %xmm1, %xmm1 ; FINITE-NEXT: maxsd %xmm0, %xmm1 -; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; FINITE-NEXT: ret define double @x_ult_inverse(double %x) nounwind { %c = fcmp ult double %x, 0.000000e+00 @@ -479,7 +479,7 @@ define double @x_ult_inverse(double %x) nounwind { ; CHECK: x_uge: ; CHECK-NEXT: pxor %xmm1, %xmm1 ; CHECK-NEXT: maxsd %xmm0, %xmm1 -; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; CHECK-NEXT: ret ; UNSAFE: x_uge: ; UNSAFE-NEXT: pxor %xmm1, %xmm1 @@ -498,7 +498,7 @@ define double @x_uge(double %x) nounwind { ; CHECK: x_ule: ; CHECK-NEXT: pxor %xmm1, %xmm1 ; CHECK-NEXT: minsd %xmm0, %xmm1 -; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; CHECK-NEXT: ret ; UNSAFE: x_ule: ; UNSAFE-NEXT: pxor %xmm1, %xmm1 @@ -521,12 +521,12 @@ define double @x_ule(double %x) nounwind { ; UNSAFE: x_uge_inverse: ; UNSAFE-NEXT: pxor %xmm1, %xmm1 ; UNSAFE-NEXT: minsd %xmm0, %xmm1 -; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: x_uge_inverse: ; FINITE-NEXT: pxor %xmm1, %xmm1 ; FINITE-NEXT: minsd %xmm0, %xmm1 -; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; FINITE-NEXT: ret define double @x_uge_inverse(double %x) nounwind { %c = fcmp uge double %x, 0.000000e+00 @@ -541,12 +541,12 @@ define double @x_uge_inverse(double %x) nounwind { ; UNSAFE: x_ule_inverse: ; UNSAFE-NEXT: pxor %xmm1, %xmm1 ; UNSAFE-NEXT: maxsd %xmm0, %xmm1 -; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: x_ule_inverse: ; FINITE-NEXT: pxor %xmm1, %xmm1 ; FINITE-NEXT: maxsd %xmm0, %xmm1 -; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; FINITE-NEXT: ret define double @x_ule_inverse(double %x) nounwind { %c = fcmp ule double %x, 0.000000e+00 @@ -587,17 +587,17 @@ define double @y_olt(double %x) nounwind { ; CHECK: y_ogt_inverse: ; CHECK-NEXT: movsd {{[^,]*}}, %xmm1 ; CHECK-NEXT: minsd %xmm0, %xmm1 -; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; CHECK-NEXT: ret ; UNSAFE: y_ogt_inverse: ; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1 ; UNSAFE-NEXT: minsd %xmm0, %xmm1 -; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: y_ogt_inverse: ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1 ; FINITE-NEXT: minsd %xmm0, %xmm1 -; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; FINITE-NEXT: ret define double @y_ogt_inverse(double %x) nounwind { %c = fcmp ogt double %x, -0.000000e+00 @@ -608,17 +608,17 @@ define double @y_ogt_inverse(double %x) nounwind { ; CHECK: y_olt_inverse: ; CHECK-NEXT: movsd {{[^,]*}}, %xmm1 ; CHECK-NEXT: maxsd %xmm0, %xmm1 -; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; CHECK-NEXT: ret ; UNSAFE: y_olt_inverse: ; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1 ; UNSAFE-NEXT: maxsd %xmm0, %xmm1 -; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: y_olt_inverse: ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1 ; FINITE-NEXT: maxsd %xmm0, %xmm1 -; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; FINITE-NEXT: ret define double @y_olt_inverse(double %x) nounwind { %c = fcmp olt double %x, -0.000000e+00 @@ -659,12 +659,12 @@ define double @y_ole(double %x) nounwind { ; UNSAFE: y_oge_inverse: ; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1 ; UNSAFE-NEXT: minsd %xmm0, %xmm1 -; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: y_oge_inverse: ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1 ; FINITE-NEXT: minsd %xmm0, %xmm1 -; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; FINITE-NEXT: ret define double @y_oge_inverse(double %x) nounwind { %c = fcmp oge double %x, -0.000000e+00 @@ -677,12 +677,12 @@ define double @y_oge_inverse(double %x) nounwind { ; UNSAFE: y_ole_inverse: ; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1 ; UNSAFE-NEXT: maxsd %xmm0, %xmm1 -; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: y_ole_inverse: ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1 ; FINITE-NEXT: maxsd %xmm0, %xmm1 -; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; FINITE-NEXT: ret define double @y_ole_inverse(double %x) nounwind { %c = fcmp ole double %x, -0.000000e+00 @@ -723,12 +723,12 @@ define double @y_ult(double %x) nounwind { ; UNSAFE: y_ugt_inverse: ; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1 ; UNSAFE-NEXT: minsd %xmm0, %xmm1 -; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: y_ugt_inverse: ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1 ; FINITE-NEXT: minsd %xmm0, %xmm1 -; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; FINITE-NEXT: ret define double @y_ugt_inverse(double %x) nounwind { %c = fcmp ugt double %x, -0.000000e+00 @@ -741,12 +741,12 @@ define double @y_ugt_inverse(double %x) nounwind { ; UNSAFE: y_ult_inverse: ; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1 ; UNSAFE-NEXT: maxsd %xmm0, %xmm1 -; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: y_ult_inverse: ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1 ; FINITE-NEXT: maxsd %xmm0, %xmm1 -; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; FINITE-NEXT: ret define double @y_ult_inverse(double %x) nounwind { %c = fcmp ult double %x, -0.000000e+00 @@ -757,7 +757,7 @@ define double @y_ult_inverse(double %x) nounwind { ; CHECK: y_uge: ; CHECK-NEXT: movsd {{[^,]*}}, %xmm1 ; CHECK-NEXT: maxsd %xmm0, %xmm1 -; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; CHECK-NEXT: ret ; UNSAFE: y_uge: ; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0 @@ -774,7 +774,7 @@ define double @y_uge(double %x) nounwind { ; CHECK: y_ule: ; CHECK-NEXT: movsd {{[^,]*}}, %xmm1 ; CHECK-NEXT: minsd %xmm0, %xmm1 -; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; CHECK-NEXT: ret ; UNSAFE: y_ule: ; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0 @@ -794,12 +794,12 @@ define double @y_ule(double %x) nounwind { ; UNSAFE: y_uge_inverse: ; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1 ; UNSAFE-NEXT: minsd %xmm0, %xmm1 -; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: y_uge_inverse: ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1 ; FINITE-NEXT: minsd %xmm0, %xmm1 -; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; FINITE-NEXT: ret define double @y_uge_inverse(double %x) nounwind { %c = fcmp uge double %x, -0.000000e+00 @@ -813,12 +813,12 @@ define double @y_uge_inverse(double %x) nounwind { ; UNSAFE: y_ule_inverse: ; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1 ; UNSAFE-NEXT: maxsd %xmm0, %xmm1 -; UNSAFE-NEXT: movapd %xmm1, %xmm0 +; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: y_ule_inverse: ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1 ; FINITE-NEXT: maxsd %xmm0, %xmm1 -; FINITE-NEXT: movapd %xmm1, %xmm0 +; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; FINITE-NEXT: ret define double @y_ule_inverse(double %x) nounwind { %c = fcmp ule double %x, -0.000000e+00 diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll index b969ecb41420..206cdff1ba7d 100644 --- a/test/CodeGen/X86/sse3.ll +++ b/test/CodeGen/X86/sse3.ll @@ -63,10 +63,10 @@ define <8 x i16> @t4(<8 x i16> %A, <8 x i16> %B) nounwind { ret <8 x i16> %tmp ; X64: t4: ; X64: pextrw $7, %xmm0, %eax -; X64: pshufhw $100, %xmm0, %xmm2 -; X64: pinsrw $1, %eax, %xmm2 +; X64: pshufhw $100, %xmm0, %xmm1 +; X64: pinsrw $1, %eax, %xmm1 ; X64: pextrw $1, %xmm0, %eax -; X64: movdqa %xmm2, %xmm0 +; X64: movdqa %xmm1, %xmm0 ; X64: pinsrw $4, %eax, %xmm0 ; X64: ret } diff --git a/test/CodeGen/X86/stack-align.ll b/test/CodeGen/X86/stack-align.ll index 271ad1aad0ba..8ca0b12b547f 100644 --- a/test/CodeGen/X86/stack-align.ll +++ b/test/CodeGen/X86/stack-align.ll @@ -9,14 +9,15 @@ target triple = "i686-apple-darwin8" define void @test({ double, double }* byval %z, double* %P) { entry: + %tmp3 = load double* @G, align 16 ; <double> [#uses=1] + %tmp4 = tail call double @fabs( double %tmp3 ) ; <double> [#uses=1] + volatile store double %tmp4, double* %P %tmp = getelementptr { double, double }* %z, i32 0, i32 0 ; <double*> [#uses=1] - %tmp1 = load double* %tmp, align 8 ; <double> [#uses=1] + %tmp1 = volatile load double* %tmp, align 8 ; <double> [#uses=1] %tmp2 = tail call double @fabs( double %tmp1 ) ; <double> [#uses=1] ; CHECK: andpd{{.*}}4(%esp), %xmm - %tmp3 = load double* @G, align 16 ; <double> [#uses=1] - %tmp4 = tail call double @fabs( double %tmp3 ) ; <double> [#uses=1] %tmp6 = fadd double %tmp4, %tmp2 ; <double> [#uses=1] - store double %tmp6, double* %P, align 8 + volatile store double %tmp6, double* %P, align 8 ret void } diff --git a/test/CodeGen/X86/stack-protector-linux.ll b/test/CodeGen/X86/stack-protector-linux.ll new file mode 100644 index 000000000000..fe2a9c5d57a1 --- /dev/null +++ b/test/CodeGen/X86/stack-protector-linux.ll @@ -0,0 +1,28 @@ +; RUN: llc -mtriple=i386-pc-linux-gnu < %s -o - | grep %gs: +; RUN: llc -mtriple=x86_64-pc-linux-gnu < %s -o - | grep %fs: +; RUN: llc -code-model=kernel -mtriple=x86_64-pc-linux-gnu < %s -o - | grep %gs: +; RUN: llc -mtriple=x86_64-apple-darwin < %s -o - | grep {__stack_chk_guard} +; RUN: llc -mtriple=x86_64-apple-darwin < %s -o - | grep {__stack_chk_fail} + +@"\01LC" = internal constant [11 x i8] c"buf == %s\0A\00" ; <[11 x i8]*> [#uses=1] + +define void @test(i8* %a) nounwind ssp { +entry: + %a_addr = alloca i8* ; <i8**> [#uses=2] + %buf = alloca [8 x i8] ; <[8 x i8]*> [#uses=2] + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + store i8* %a, i8** %a_addr + %buf1 = bitcast [8 x i8]* %buf to i8* ; <i8*> [#uses=1] + %0 = load i8** %a_addr, align 4 ; <i8*> [#uses=1] + %1 = call i8* @strcpy(i8* %buf1, i8* %0) nounwind ; <i8*> [#uses=0] + %buf2 = bitcast [8 x i8]* %buf to i8* ; <i8*> [#uses=1] + %2 = call i32 (i8*, ...)* @printf(i8* getelementptr ([11 x i8]* @"\01LC", i32 0, i32 0), i8* %buf2) nounwind ; <i32> [#uses=0] + br label %return + +return: ; preds = %entry + ret void +} + +declare i8* @strcpy(i8*, i8*) nounwind + +declare i32 @printf(i8*, ...) nounwind diff --git a/test/CodeGen/X86/store-narrow.ll b/test/CodeGen/X86/store-narrow.ll index b1100fa960c0..5682e7caf8bd 100644 --- a/test/CodeGen/X86/store-narrow.ll +++ b/test/CodeGen/X86/store-narrow.ll @@ -67,7 +67,7 @@ entry: ; X64: movw %si, 2(%rdi) ; X32: test4: -; X32: movzwl 8(%esp), %eax +; X32: movl 8(%esp), %eax ; X32: movw %ax, 2(%{{.*}}) } diff --git a/test/CodeGen/X86/switch-bt.ll b/test/CodeGen/X86/switch-bt.ll new file mode 100644 index 000000000000..ed3266ec422b --- /dev/null +++ b/test/CodeGen/X86/switch-bt.ll @@ -0,0 +1,51 @@ +; RUN: llc -march=x86-64 -asm-verbose=false < %s | FileCheck %s + +; This switch should use bit tests, and the third bit test case is just +; testing for one possible value, so it doesn't need a bt. + +; CHECK: movabsq $2305843009482129440, %r +; CHECK-NEXT: btq %rax, %r +; CHECK-NEXT: jb +; CHECK-NEXT: movl $671088640, %e +; CHECK-NEXT: btq %rax, %r +; CHECK-NEXT: jb +; CHECK-NEXT: testq %rax, %r +; CHECK-NEXT: j + +define void @test(i8* %l) nounwind { +entry: + %l.addr = alloca i8*, align 8 ; <i8**> [#uses=2] + store i8* %l, i8** %l.addr + %tmp = load i8** %l.addr ; <i8*> [#uses=1] + %tmp1 = load i8* %tmp ; <i8> [#uses=1] + %conv = sext i8 %tmp1 to i32 ; <i32> [#uses=1] + switch i32 %conv, label %sw.default [ + i32 62, label %sw.bb + i32 60, label %sw.bb + i32 38, label %sw.bb2 + i32 94, label %sw.bb2 + i32 61, label %sw.bb2 + i32 33, label %sw.bb4 + ] + +sw.bb: ; preds = %entry, %entry + call void @foo(i32 0) + br label %sw.epilog + +sw.bb2: ; preds = %entry, %entry, %entry + call void @foo(i32 1) + br label %sw.epilog + +sw.bb4: ; preds = %entry + call void @foo(i32 3) + br label %sw.epilog + +sw.default: ; preds = %entry + call void @foo(i32 97) + br label %sw.epilog + +sw.epilog: ; preds = %sw.default, %sw.bb4, %sw.bb2, %sw.bb + ret void +} + +declare void @foo(i32) diff --git a/test/CodeGen/X86/tailcallstack64.ll b/test/CodeGen/X86/tailcallstack64.ll index d05dff8928ba..107bdf9de3e7 100644 --- a/test/CodeGen/X86/tailcallstack64.ll +++ b/test/CodeGen/X86/tailcallstack64.ll @@ -2,9 +2,11 @@ ; Check that lowered arguments on the stack do not overwrite each other. ; Add %in1 %p1 to a different temporary register (%eax). -; CHECK: movl %edi, %eax +; CHECK: movl 32(%rsp), %eax ; Move param %in1 to temp register (%r10d). ; CHECK: movl 40(%rsp), %r10d +; Add %in1 %p1 to a different temporary register (%eax). +; CHECK: addl %edi, %eax ; Move param %in2 to stack. ; CHECK: movl %r10d, 32(%rsp) ; Move result of addition to stack. diff --git a/test/CodeGen/X86/tls-1.ll b/test/CodeGen/X86/tls-1.ll index 5f6cbe09fcf7..de694d8d471f 100644 --- a/test/CodeGen/X86/tls-1.ll +++ b/test/CodeGen/X86/tls-1.ll @@ -7,13 +7,13 @@ ; CHECK: .section __DATA,__thread_vars,thread_local_variables ; CHECK: .globl _a ; CHECK: _a: -; CHECK: .quad ___tlv_bootstrap +; CHECK: .quad __tlv_bootstrap ; CHECK: .quad 0 ; CHECK: .quad _a$tlv$init ; CHECK: .tbss _b$tlv$init, 4, 2 ; CHECK: .globl _b ; CHECK: _b: -; CHECK: .quad ___tlv_bootstrap +; CHECK: .quad __tlv_bootstrap ; CHECK: .quad 0 ; CHECK: .quad _b$tlv$init diff --git a/test/CodeGen/X86/v-binop-widen.ll b/test/CodeGen/X86/v-binop-widen.ll new file mode 100644 index 000000000000..3bee7007749c --- /dev/null +++ b/test/CodeGen/X86/v-binop-widen.ll @@ -0,0 +1,12 @@ +; RUN: llc -march=x86 -mattr=+sse < %s | FileCheck %s +; CHECK: divss +; CHECK: divps +; CHECK: divps + +%vec = type <9 x float> +define %vec @vecdiv( %vec %p1, %vec %p2) +{ + %result = fdiv %vec %p1, %p2 + ret %vec %result +} + diff --git a/test/CodeGen/X86/v-binop-widen2.ll b/test/CodeGen/X86/v-binop-widen2.ll new file mode 100644 index 000000000000..ae3f55a316fa --- /dev/null +++ b/test/CodeGen/X86/v-binop-widen2.ll @@ -0,0 +1,40 @@ +; RUN: llc -march=x86 -mattr=+sse < %s | FileCheck %s + +%vec = type <6 x float> +; CHECK: divss +; CHECK: divss +; CHECK: divps +define %vec @vecdiv( %vec %p1, %vec %p2) +{ + %result = fdiv %vec %p1, %p2 + ret %vec %result +} + +@a = constant %vec < float 2.0, float 4.0, float 8.0, float 16.0, float 32.0, float 64.0 > +@b = constant %vec < float 2.0, float 2.0, float 2.0, float 2.0, float 2.0, float 2.0 > + +; Expected result: < 1.0, 2.0, 4.0, ..., 2.0^(n-1) > +; main() returns 0 if the result is expected and 1 otherwise +; to execute, use llvm-as < %s | lli +define i32 @main() nounwind { +entry: + %avec = load %vec* @a + %bvec = load %vec* @b + + %res = call %vec @vecdiv(%vec %avec, %vec %bvec) + br label %loop +loop: + %idx = phi i32 [0, %entry], [%nextInd, %looptail] + %expected = phi float [1.0, %entry], [%nextExpected, %looptail] + %elem = extractelement %vec %res, i32 %idx + %expcmp = fcmp oeq float %elem, %expected + br i1 %expcmp, label %looptail, label %return +looptail: + %nextExpected = fmul float %expected, 2.0 + %nextInd = add i32 %idx, 1 + %cmp = icmp slt i32 %nextInd, 6 + br i1 %cmp, label %loop, label %return +return: + %retval = phi i32 [0, %looptail], [1, %loop] + ret i32 %retval +} diff --git a/test/CodeGen/X86/v2f32.ll b/test/CodeGen/X86/v2f32.ll new file mode 100644 index 000000000000..9c4b773a6190 --- /dev/null +++ b/test/CodeGen/X86/v2f32.ll @@ -0,0 +1,39 @@ +; RUN: llc < %s -march=x86-64 -mcpu=penryn -asm-verbose=0 -o - | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -mcpu=yonah -march=x86 -asm-verbose=0 -o - | FileCheck %s -check-prefix=X32 + +; PR7518 +define void @test1(<2 x float> %Q, float *%P2) nounwind { + %a = extractelement <2 x float> %Q, i32 0 + %b = extractelement <2 x float> %Q, i32 1 + %c = fadd float %a, %b + + store float %c, float* %P2 + ret void +; X64: test1: +; X64-NEXT: addss %xmm1, %xmm0 +; X64-NEXT: movss %xmm0, (%rdi) +; X64-NEXT: ret + +; X32: test1: +; X32-NEXT: movss 4(%esp), %xmm0 +; X32-NEXT: addss 8(%esp), %xmm0 +; X32-NEXT: movl 12(%esp), %eax +; X32-NEXT: movss %xmm0, (%eax) +; X32-NEXT: ret +} + + +define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, <2 x float> *%P) nounwind { + %Z = fadd <2 x float> %Q, %R + ret <2 x float> %Z + +; X64: test2: +; X64-NEXT: insertps $0 +; X64-NEXT: insertps $16 +; X64-NEXT: insertps $0 +; X64-NEXT: insertps $16 +; X64-NEXT: addps +; X64-NEXT: movaps +; X64-NEXT: pshufd +; X64-NEXT: ret +} diff --git a/test/CodeGen/X86/vec-trunc-store.ll b/test/CodeGen/X86/vec-trunc-store.ll index ea1a151a5916..2f57d7b571f0 100644 --- a/test/CodeGen/X86/vec-trunc-store.ll +++ b/test/CodeGen/X86/vec-trunc-store.ll @@ -1,13 +1,15 @@ -; RUN: llc < %s -march=x86-64 -disable-mmx | grep punpcklwd | count 2 +; RUN: llc < %s -march=x86-64 -disable-mmx >/dev/null -define void @foo() nounwind { - %cti69 = trunc <8 x i32> undef to <8 x i16> ; <<8 x i16>> [#uses=1] +define void @foo(<8 x i32>* %p) nounwind { + %t = load <8 x i32>* %p + %cti69 = trunc <8 x i32> %t to <8 x i16> ; <<8 x i16>> [#uses=1] store <8 x i16> %cti69, <8 x i16>* undef ret void } -define void @bar() nounwind { - %cti44 = trunc <4 x i32> undef to <4 x i16> ; <<4 x i16>> [#uses=1] +define void @bar(<4 x i32>* %p) nounwind { + %t = load <4 x i32>* %p + %cti44 = trunc <4 x i32> %t to <4 x i16> ; <<4 x i16>> [#uses=1] store <4 x i16> %cti44, <4 x i16>* undef ret void } diff --git a/test/CodeGen/X86/vec_shuffle-6.ll b/test/CodeGen/X86/vec_shuffle-6.ll index f034b0aa7102..28fd59b29dd3 100644 --- a/test/CodeGen/X86/vec_shuffle-6.ll +++ b/test/CodeGen/X86/vec_shuffle-6.ll @@ -4,7 +4,7 @@ ; RUN: grep movups %t | count 2 target triple = "i686-apple-darwin" -@x = global [4 x i32] [ i32 1, i32 2, i32 3, i32 4 ] ; <[4 x i32]*> [#uses=4] +@x = external global [4 x i32] define <2 x i64> @test1() { %tmp = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 0) ; <i32> [#uses=1] diff --git a/test/CodeGen/X86/vector-intrinsics.ll b/test/CodeGen/X86/vector-intrinsics.ll index edf58b9da111..cabacb572cea 100644 --- a/test/CodeGen/X86/vector-intrinsics.ll +++ b/test/CodeGen/X86/vector-intrinsics.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86-64 | grep call | count 16 +; RUN: llc < %s -march=x86-64 | grep call | count 43 declare <4 x double> @llvm.sin.v4f64(<4 x double> %p) declare <4 x double> @llvm.cos.v4f64(<4 x double> %p) @@ -25,3 +25,28 @@ define <4 x double> @zoo(<4 x double> %p, i32 %q) %t = call <4 x double> @llvm.powi.v4f64(<4 x double> %p, i32 %q) ret <4 x double> %t } + + +declare <9 x double> @llvm.exp.v9f64(<9 x double> %a) +declare <9 x double> @llvm.pow.v9f64(<9 x double> %a, <9 x double> %b) +declare <9 x double> @llvm.powi.v9f64(<9 x double> %a, i32) + +define void @a(<9 x double>* %p) nounwind { + %a = load <9 x double>* %p + %r = call <9 x double> @llvm.exp.v9f64(<9 x double> %a) + store <9 x double> %r, <9 x double>* %p + ret void +} +define void @b(<9 x double>* %p, <9 x double>* %q) nounwind { + %a = load <9 x double>* %p + %b = load <9 x double>* %q + %r = call <9 x double> @llvm.pow.v9f64(<9 x double> %a, <9 x double> %b) + store <9 x double> %r, <9 x double>* %p + ret void +} +define void @c(<9 x double>* %p, i32 %n) nounwind { + %a = load <9 x double>* %p + %r = call <9 x double> @llvm.powi.v9f64(<9 x double> %a, i32 %n) + store <9 x double> %r, <9 x double>* %p + ret void +} diff --git a/test/CodeGen/X86/volatile.ll b/test/CodeGen/X86/volatile.ll index 5e1e0c858e65..2e5742afdf85 100644 --- a/test/CodeGen/X86/volatile.ll +++ b/test/CodeGen/X86/volatile.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86 -mattr=sse2 | grep movsd | count 5 -; RUN: llc < %s -march=x86 -mattr=sse2 -O0 | grep movsd | count 5 +; RUN: llc < %s -march=x86 -mattr=sse2 -O0 | grep -v esp | grep movsd | count 5 @x = external global double diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll index 47dba4b4a04b..25dde57c767e 100644 --- a/test/CodeGen/X86/widen_shuffle-1.ll +++ b/test/CodeGen/X86/widen_shuffle-1.ll @@ -1,13 +1,46 @@ ; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s -; CHECK: insertps -; CHECK: extractps ; widening shuffle v3float and then a add - define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind { entry: +; CHECK: insertps +; CHECK: extractps %x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 1, i32 2> %val = fadd <3 x float> %x, %src2 store <3 x float> %val, <3 x float>* %dst.addr ret void } + + +; widening shuffle v3float with a different mask and then a add +define void @shuf2(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind { +entry: +; CHECK: insertps +; CHECK: extractps + %x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 4, i32 2> + %val = fadd <3 x float> %x, %src2 + store <3 x float> %val, <3 x float>* %dst.addr + ret void +} + +; Example of when widening a v3float operation causes the DAG to replace a node +; with the operation that we are currently widening, i.e. when replacing +; opA with opB, the DAG will produce new operations with opA. +define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) { +entry: +; CHECK: pshufd + %shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> <i32 0, i32 1, i32 4, i32 5> + %tmp25.i.i = shufflevector <4 x float> %shuffle.i.i.i12, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> + %tmp1.i.i = shufflevector <3 x float> %tmp25.i.i, <3 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %tmp3.i13 = shufflevector <4 x float> %tmp1.i.i, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> ; <<3 x float>> + %tmp6.i14 = shufflevector <3 x float> %tmp3.i13, <3 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %tmp97.i = shufflevector <4 x float> %tmp6.i14, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> + %tmp2.i18 = shufflevector <3 x float> %tmp97.i, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2> + %t5 = bitcast <4 x float> %tmp2.i18 to <4 x i32> + %shr.i.i19 = lshr <4 x i32> %t5, <i32 19, i32 19, i32 19, i32 19> + %and.i.i20 = and <4 x i32> %shr.i.i19, <i32 4080, i32 4080, i32 4080, i32 4080> + %shuffle.i.i.i21 = shufflevector <4 x float> %tmp2.i18, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3> + store <4 x float> %shuffle.i.i.i21, <4 x float>* %dst + ret void +} + diff --git a/test/CodeGen/X86/widen_shuffle-2.ll b/test/CodeGen/X86/widen_shuffle-2.ll deleted file mode 100644 index 9374a028631d..000000000000 --- a/test/CodeGen/X86/widen_shuffle-2.ll +++ /dev/null @@ -1,13 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s -; CHECK: insertps -; CHECK: extractps - -; widening shuffle v3float and then a add - -define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind { -entry: - %x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 4, i32 2> - %val = fadd <3 x float> %x, %src2 - store <3 x float> %val, <3 x float>* %dst.addr - ret void -} diff --git a/test/CodeGen/X86/x86-64-tls-1.ll b/test/CodeGen/X86/x86-64-tls-1.ll new file mode 100644 index 000000000000..8d3b300da3bf --- /dev/null +++ b/test/CodeGen/X86/x86-64-tls-1.ll @@ -0,0 +1,6 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s +@tm_nest_level = internal thread_local global i32 0 +define i64 @z() nounwind { +; CHECK: movabsq $tm_nest_level@TPOFF, %rcx + ret i64 and (i64 ptrtoint (i32* @tm_nest_level to i64), i64 100) +} diff --git a/test/CodeGen/X86/zext-sext.ll b/test/CodeGen/X86/zext-sext.ll new file mode 100644 index 000000000000..bd109b92d9f7 --- /dev/null +++ b/test/CodeGen/X86/zext-sext.ll @@ -0,0 +1,53 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s +; <rdar://problem/8006248> + +@llvm.used = appending global [1 x i8*] [i8* bitcast (void ([40 x i16]*, i32*, i16**, i64*)* @func to i8*)], section "llvm.metadata" + +define void @func([40 x i16]* %a, i32* %b, i16** %c, i64* %d) nounwind { +entry: + %tmp103 = getelementptr inbounds [40 x i16]* %a, i64 0, i64 4 + %tmp104 = load i16* %tmp103, align 2 + %tmp105 = sext i16 %tmp104 to i32 + %tmp106 = load i32* %b, align 4 + %tmp107 = sub nsw i32 4, %tmp106 + %tmp108 = load i16** %c, align 8 + %tmp109 = sext i32 %tmp107 to i64 + %tmp110 = getelementptr inbounds i16* %tmp108, i64 %tmp109 + %tmp111 = load i16* %tmp110, align 1 + %tmp112 = sext i16 %tmp111 to i32 + %tmp = mul i32 355244649, %tmp112 + %tmp1 = mul i32 %tmp, %tmp105 + %tmp2 = add i32 %tmp1, 2138875574 + %tmp3 = add i32 %tmp2, 1546991088 + %tmp4 = mul i32 %tmp3, 2122487257 + %tmp5 = icmp sge i32 %tmp4, 2138875574 + %tmp6 = icmp slt i32 %tmp4, -8608074 + %tmp7 = or i1 %tmp5, %tmp6 + %outSign = select i1 %tmp7, i32 1, i32 -1 + %tmp8 = icmp slt i32 %tmp4, 0 + %tmp9 = icmp eq i32 %outSign, 1 + %tmp10 = and i1 %tmp8, %tmp9 + %tmp11 = sext i32 %tmp4 to i64 + %tmp12 = add i64 %tmp11, 5089792279245435153 + +; CHECK: addl $2138875574, %e[[REGISTER_zext:[a-z]+]] +; CHECK-NEXT: movslq %e[[REGISTER_zext]], [[REGISTER_tmp:%[a-z]+]] +; CHECK: movq [[REGISTER_tmp]], [[REGISTER_sext:%[a-z]+]] +; CHECK-NEXT: subq %r[[REGISTER_zext]], [[REGISTER_sext]] + + %tmp13 = sub i64 %tmp12, 2138875574 + %tmp14 = zext i32 %tmp4 to i64 + %tmp15 = sub i64 %tmp11, %tmp14 + %tmp16 = select i1 %tmp10, i64 %tmp15, i64 0 + %tmp17 = sub i64 %tmp13, %tmp16 + %tmp18 = mul i64 %tmp17, 4540133155013554595 + %tmp19 = sub i64 %tmp18, 5386586244038704851 + %tmp20 = add i64 %tmp19, -1368057358110947217 + %tmp21 = mul i64 %tmp20, -422037402840850817 + %tmp115 = load i64* %d, align 8 + %alphaX = mul i64 468858157810230901, %tmp21 + %alphaXbetaY = add i64 %alphaX, %tmp115 + %transformed = add i64 %alphaXbetaY, 9040145182981852475 + store i64 %transformed, i64* %d, align 8 + ret void +} |