From dadbdfff07596fc3b48cc1e735181b9b8c893f67 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Fri, 22 Jan 2016 21:16:09 +0000 Subject: Vendor import of llvm release_38 branch r258549: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258549 --- test/CodeGen/AArch64/cxx-tlscc.ll | 27 +++++++++ test/CodeGen/ARM/cse-flags.ll | 43 ++++++++++++++ test/CodeGen/ARM/cxx-tlscc.ll | 11 ++++ test/CodeGen/ARM/memfunc.ll | 18 +++--- test/CodeGen/X86/2014-05-30-CombineAddNSW.ll | 20 ------- test/CodeGen/X86/cxx_tlscc64.ll | 27 +++++++++ test/CodeGen/X86/x86-shrink-wrap-unwind.ll | 83 ++++++++++++++++++++++++++-- 7 files changed, 194 insertions(+), 35 deletions(-) create mode 100644 test/CodeGen/ARM/cse-flags.ll delete mode 100644 test/CodeGen/X86/2014-05-30-CombineAddNSW.ll (limited to 'test/CodeGen') diff --git a/test/CodeGen/AArch64/cxx-tlscc.ll b/test/CodeGen/AArch64/cxx-tlscc.ll index a9ae00c8d270..9996c0d3aba8 100644 --- a/test/CodeGen/AArch64/cxx-tlscc.ll +++ b/test/CodeGen/AArch64/cxx-tlscc.ll @@ -8,6 +8,7 @@ @sg = internal thread_local global %struct.S zeroinitializer, align 1 @__dso_handle = external global i8 @__tls_guard = internal thread_local unnamed_addr global i1 false +@sum1 = internal thread_local global i32 0, align 4 declare %struct.S* @_ZN1SC1Ev(%struct.S* returned) declare %struct.S* @_ZN1SD1Ev(%struct.S* returned) @@ -74,3 +75,29 @@ __tls_init.exit: ; CHECK-NOT: ldp d27, d26 ; CHECK-NOT: ldp d29, d28 ; CHECK-NOT: ldp d31, d30 + +; CHECK-LABEL: _ZTW4sum1 +; CHECK-NOT: stp d31, d30 +; CHECK-NOT: stp d29, d28 +; CHECK-NOT: stp d27, d26 +; CHECK-NOT: stp d25, d24 +; CHECK-NOT: stp d23, d22 +; CHECK-NOT: stp d21, d20 +; CHECK-NOT: stp d19, d18 +; CHECK-NOT: stp d17, d16 +; CHECK-NOT: stp d7, d6 +; CHECK-NOT: stp d5, d4 +; CHECK-NOT: stp d3, d2 +; CHECK-NOT: stp d1, d0 +; CHECK-NOT: stp x20, x19 +; CHECK-NOT: stp x14, x13 +; CHECK-NOT: stp x12, x11 +; CHECK-NOT: stp x10, x9 +; CHECK-NOT: stp x8, x7 +; CHECK-NOT: stp x6, x5 +; CHECK-NOT: stp x4, x3 +; CHECK-NOT: stp x2, x1 +; CHECK: blr +define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind { + ret i32* @sum1 +} diff --git a/test/CodeGen/ARM/cse-flags.ll b/test/CodeGen/ARM/cse-flags.ll new file mode 100644 index 000000000000..c18e2fcb6039 --- /dev/null +++ b/test/CodeGen/ARM/cse-flags.ll @@ -0,0 +1,43 @@ +; RUN: llc -asm-verbose=false < %s | FileCheck %s +; PR26063 + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv7--linux-gnueabihf" + +; CHECK: .LBB0_1: +; CHECK-NEXT: bl f{{$}} +; CHECK-NEXT: ldrb r[[T0:[0-9]+]], [r{{[0-9]+}}, #1]!{{$}} +; CHECK-NEXT: cmp r{{[0-9]+}}, #1{{$}} +; CHECK-NEXT: cmpne r[[T0]], #0{{$}} +; CHECK-NEXT: bne .LBB0_1{{$}} +define i8* @h(i8* readonly %a, i32 %b, i32 %c) { +entry: + %0 = load i8, i8* %a, align 1 + %tobool4 = icmp ne i8 %0, 0 + %cmp5 = icmp ne i32 %b, 1 + %1 = and i1 %cmp5, %tobool4 + br i1 %1, label %while.body.preheader, label %while.end + +while.body.preheader: ; preds = %entry + br label %while.body + +while.body: ; preds = %while.body.preheader, %while.body + %a.addr.06 = phi i8* [ %incdec.ptr, %while.body ], [ %a, %while.body.preheader ] + %call = tail call i32 bitcast (i32 (...)* @f to i32 ()*)() + %incdec.ptr = getelementptr inbounds i8, i8* %a.addr.06, i32 1 + %2 = load i8, i8* %incdec.ptr, align 1 + %tobool = icmp ne i8 %2, 0 + %cmp = icmp ne i32 %call, 1 + %3 = and i1 %cmp, %tobool + br i1 %3, label %while.body, label %while.end.loopexit + +while.end.loopexit: ; preds = %while.body + %incdec.ptr.lcssa = phi i8* [ %incdec.ptr, %while.body ] + br label %while.end + +while.end: ; preds = %while.end.loopexit, %entry + %a.addr.0.lcssa = phi i8* [ %a, %entry ], [ %incdec.ptr.lcssa, %while.end.loopexit ] + ret i8* %a.addr.0.lcssa +} + +declare i32 @f(...) diff --git a/test/CodeGen/ARM/cxx-tlscc.ll b/test/CodeGen/ARM/cxx-tlscc.ll index 7b776d4b8e88..11173bbb1978 100644 --- a/test/CodeGen/ARM/cxx-tlscc.ll +++ b/test/CodeGen/ARM/cxx-tlscc.ll @@ -8,6 +8,7 @@ @sg = internal thread_local global %struct.S zeroinitializer, align 1 @__dso_handle = external global i8 @__tls_guard = internal thread_local unnamed_addr global i1 false +@sum1 = internal thread_local global i32 0, align 4 declare %struct.S* @_ZN1SC1Ev(%struct.S* returned) declare %struct.S* @_ZN1SD1Ev(%struct.S* returned) @@ -44,3 +45,13 @@ __tls_init.exit: ; CHECK-NOT: pop {r9, r12} ; CHECK-NOT: pop {r1, r2, r3, r4, r7, pc} ; CHECK: pop {lr} + +; CHECK-LABEL: _ZTW4sum1 +; CHECK-NOT: push {r1, r2, r3, r4, r7, lr} +; CHECK-NOT: push {r9, r12} +; CHECK-NOT: vpush {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31} +; CHECK-NOT: vpush {d0, d1, d2, d3, d4, d5, d6, d7} +; CHECK: blx +define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind { + ret i32* @sum1 +} diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll index 66743f3e9d5e..46fef7629cc4 100644 --- a/test/CodeGen/ARM/memfunc.ll +++ b/test/CodeGen/ARM/memfunc.ll @@ -1,10 +1,10 @@ -; RUN: llc < %s -mtriple=armv7-apple-ios -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-IOS -; RUN: llc < %s -mtriple=thumbv7m-none-macho -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-DARWIN -; RUN: llc < %s -mtriple=arm-none-eabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI -; RUN: llc < %s -mtriple=arm-none-eabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI -; RUN: llc < %s -mtriple=arm-none-androideabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI -; RUN: llc < %s -mtriple=arm-none-gnueabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI -; RUN: llc < %s -mtriple=arm-none-gnueabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI +; RUN: llc < %s -mtriple=armv7-apple-ios -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-IOS --check-prefix=CHECK +; RUN: llc < %s -mtriple=thumbv7m-none-macho -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-DARWIN --check-prefix=CHECK +; RUN: llc < %s -mtriple=arm-none-eabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI --check-prefix=CHECK +; RUN: llc < %s -mtriple=arm-none-eabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI --check-prefix=CHECK +; RUN: llc < %s -mtriple=arm-none-androideabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI --check-prefix=CHECK +; RUN: llc < %s -mtriple=arm-none-gnueabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI --check-prefix=CHECK +; RUN: llc < %s -mtriple=arm-none-gnueabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI --check-prefix=CHECK define void @f1(i8* %dest, i8* %src) { entry: @@ -402,8 +402,8 @@ entry: ; CHECK: arr1: ; CHECK-IOS: .align 3 ; CHECK-DARWIN: .align 2 -; CHECK-EABI: .align 2 -; CHECK-GNUEABI: .align 2 +; CHECK-EABI-NOT: .align +; CHECK-GNUEABI-NOT: .align ; CHECK: arr2: ; CHECK: {{\.section.+foo,bar}} ; CHECK-NOT: .align diff --git a/test/CodeGen/X86/2014-05-30-CombineAddNSW.ll b/test/CodeGen/X86/2014-05-30-CombineAddNSW.ll deleted file mode 100644 index 4580795880ab..000000000000 --- a/test/CodeGen/X86/2014-05-30-CombineAddNSW.ll +++ /dev/null @@ -1,20 +0,0 @@ -; RUN: llc < %s -march=x86-64 | FileCheck %s -; CHECK: addl - -; The two additions are the same , but have different flags. -; In theory this code should never be generated by the frontend, but this -; tries to test that two identical instructions with two different flags -; actually generate two different nodes. -; -; Normally the combiner would see this condition without the flags -; and optimize the result of the sub into a register clear -; (the final result would be 0). With the different flags though the combiner -; needs to keep the add + sub nodes, because the two nodes result as different -; nodes and so cannot assume that the subtraction of the two nodes -; generates 0 as result -define i32 @foo(i32 %a, i32 %b) { - %1 = add i32 %a, %b - %2 = add nsw i32 %a, %b - %3 = sub i32 %1, %2 - ret i32 %3 -} diff --git a/test/CodeGen/X86/cxx_tlscc64.ll b/test/CodeGen/X86/cxx_tlscc64.ll index 70fe501040bf..6c8e45e42d15 100644 --- a/test/CodeGen/X86/cxx_tlscc64.ll +++ b/test/CodeGen/X86/cxx_tlscc64.ll @@ -4,11 +4,13 @@ ; tricks similar to AArch64 fast TLS calling convention (r255821). ; Applying tricks on x86-64 similar to r255821. ; RUN: llc < %s -mtriple=x86_64-apple-darwin -enable-shrink-wrap=true | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -O0 | FileCheck %s --check-prefix=CHECK-O0 %struct.S = type { i8 } @sg = internal thread_local global %struct.S zeroinitializer, align 1 @__dso_handle = external global i8 @__tls_guard = internal thread_local unnamed_addr global i1 false +@sum1 = internal thread_local global i32 0, align 4 declare void @_ZN1SC1Ev(%struct.S*) declare void @_ZN1SD1Ev(%struct.S*) @@ -50,3 +52,28 @@ init.i: __tls_init.exit: ret %struct.S* @sg } + +; CHECK-LABEL: _ZTW4sum1 +; CHECK-NOT: pushq %r11 +; CHECK-NOT: pushq %r10 +; CHECK-NOT: pushq %r9 +; CHECK-NOT: pushq %r8 +; CHECK-NOT: pushq %rsi +; CHECK-NOT: pushq %rdx +; CHECK-NOT: pushq %rcx +; CHECK-NOT: pushq %rbx +; CHECK: callq +define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind { + ret i32* @sum1 +} + +; Make sure at O0 we don't overwrite RBP. +; CHECK-O0-LABEL: _ZTW4sum2 +; CHECK-O0: pushq %rbp +; CHECK-O0: movq %rsp, %rbp +; CHECK-O0-NOT: movq %r{{.*}}, (%rbp) +define cxx_fast_tlscc i32* @_ZTW4sum2() #0 { + ret i32* @sum1 +} + +attributes #0 = { nounwind "no-frame-pointer-elim"="true" } diff --git a/test/CodeGen/X86/x86-shrink-wrap-unwind.ll b/test/CodeGen/X86/x86-shrink-wrap-unwind.ll index 7c00f407b1e0..eb87f7101d7c 100644 --- a/test/CodeGen/X86/x86-shrink-wrap-unwind.ll +++ b/test/CodeGen/X86/x86-shrink-wrap-unwind.ll @@ -1,11 +1,5 @@ ; RUN: llc %s -o - | FileCheck %s --check-prefix=CHECK ; -; This test checks that we do not use shrink-wrapping when -; the function does not have any frame pointer and may unwind. -; This is a workaround for a limitation in the emission of -; the CFI directives, that are not correct in such case. -; PR25614 -; ; Note: This test cannot be merged with the shrink-wrapping tests ; because the booleans set on the command line take precedence on ; the target logic that disable shrink-wrapping. @@ -13,6 +7,12 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "x86_64-apple-macosx" +; This test checks that we do not use shrink-wrapping when +; the function does not have any frame pointer and may unwind. +; This is a workaround for a limitation in the emission of +; the CFI directives, that are not correct in such case. +; PR25614 +; ; No shrink-wrapping should occur here, until the CFI information are fixed. ; CHECK-LABEL: framelessUnwind: ; @@ -151,3 +151,74 @@ false: } attributes #2 = { "no-frame-pointer-elim"="false" nounwind } + + +; Check that we generate correct code for segmented stack. +; We used to emit the code at the entry point of the function +; instead of just before the prologue. +; For now, shrink-wrapping is disabled on segmented stack functions: PR26107. +; +; CHECK-LABEL: segmentedStack: +; CHECK: cmpq +; CHECK-NEXT: ja [[ENTRY_LABEL:LBB[0-9_]+]] +; +; CHECK: callq ___morestack +; CHECK-NEXT: retq +; +; CHECK: [[ENTRY_LABEL]]: +; Prologue +; CHECK: push +; +; In PR26107, we use to drop these two basic blocks, because +; the segmentedStack entry block was jumping directly to +; the place where the prologue is actually needed, which is +; the call to memcmp. +; Then, those two basic blocks did not have any predecessors +; anymore and were removed. +; +; Check if vk1 is null +; CHECK: testq %rdi, %rdi +; CHECK-NEXT: je [[STRINGS_EQUAL:LBB[0-9_]+]] +; +; Check if vk2 is null +; CHECK: testq %rsi, %rsi +; CHECK-NEXT: je [[STRINGS_EQUAL]] +; +; CHECK: [[STRINGS_EQUAL]] +; CHECK-NEXT: popq +define zeroext i1 @segmentedStack(i8* readonly %vk1, i8* readonly %vk2, i64 %key_size) #5 { +entry: + %cmp.i = icmp eq i8* %vk1, null + %cmp1.i = icmp eq i8* %vk2, null + %brmerge.i = or i1 %cmp.i, %cmp1.i + %cmp1.mux.i = and i1 %cmp.i, %cmp1.i + br i1 %brmerge.i, label %__go_ptr_strings_equal.exit, label %if.end4.i + +if.end4.i: ; preds = %entry + %tmp = getelementptr inbounds i8, i8* %vk1, i64 8 + %tmp1 = bitcast i8* %tmp to i64* + %tmp2 = load i64, i64* %tmp1, align 8 + %tmp3 = getelementptr inbounds i8, i8* %vk2, i64 8 + %tmp4 = bitcast i8* %tmp3 to i64* + %tmp5 = load i64, i64* %tmp4, align 8 + %cmp.i.i = icmp eq i64 %tmp2, %tmp5 + br i1 %cmp.i.i, label %land.rhs.i.i, label %__go_ptr_strings_equal.exit + +land.rhs.i.i: ; preds = %if.end4.i + %tmp6 = bitcast i8* %vk2 to i8** + %tmp7 = load i8*, i8** %tmp6, align 8 + %tmp8 = bitcast i8* %vk1 to i8** + %tmp9 = load i8*, i8** %tmp8, align 8 + %call.i.i = tail call i32 @memcmp(i8* %tmp9, i8* %tmp7, i64 %tmp2) #5 + %cmp4.i.i = icmp eq i32 %call.i.i, 0 + br label %__go_ptr_strings_equal.exit + +__go_ptr_strings_equal.exit: ; preds = %land.rhs.i.i, %if.end4.i, %entry + %retval.0.i = phi i1 [ %cmp1.mux.i, %entry ], [ false, %if.end4.i ], [ %cmp4.i.i, %land.rhs.i.i ] + ret i1 %retval.0.i +} + +; Function Attrs: nounwind readonly +declare i32 @memcmp(i8* nocapture, i8* nocapture, i64) #5 + +attributes #5 = { nounwind readonly ssp uwtable "split-stack" } -- cgit v1.2.3