From dadbdfff07596fc3b48cc1e735181b9b8c893f67 Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Fri, 22 Jan 2016 21:16:09 +0000
Subject: Vendor import of llvm release_38 branch r258549:
 https://llvm.org/svn/llvm-project/llvm/branches/release_38@258549

---
 test/CodeGen/AArch64/cxx-tlscc.ll            | 27 +++++++++
 test/CodeGen/ARM/cse-flags.ll                | 43 ++++++++++++++
 test/CodeGen/ARM/cxx-tlscc.ll                | 11 ++++
 test/CodeGen/ARM/memfunc.ll                  | 18 +++---
 test/CodeGen/X86/2014-05-30-CombineAddNSW.ll | 20 -------
 test/CodeGen/X86/cxx_tlscc64.ll              | 27 +++++++++
 test/CodeGen/X86/x86-shrink-wrap-unwind.ll   | 83 ++++++++++++++++++++++++++--
 7 files changed, 194 insertions(+), 35 deletions(-)
 create mode 100644 test/CodeGen/ARM/cse-flags.ll
 delete mode 100644 test/CodeGen/X86/2014-05-30-CombineAddNSW.ll

(limited to 'test/CodeGen')

diff --git a/test/CodeGen/AArch64/cxx-tlscc.ll b/test/CodeGen/AArch64/cxx-tlscc.ll
index a9ae00c8d270..9996c0d3aba8 100644
--- a/test/CodeGen/AArch64/cxx-tlscc.ll
+++ b/test/CodeGen/AArch64/cxx-tlscc.ll
@@ -8,6 +8,7 @@
 @sg = internal thread_local global %struct.S zeroinitializer, align 1
 @__dso_handle = external global i8
 @__tls_guard = internal thread_local unnamed_addr global i1 false
+@sum1 = internal thread_local global i32 0, align 4
 
 declare %struct.S* @_ZN1SC1Ev(%struct.S* returned)
 declare %struct.S* @_ZN1SD1Ev(%struct.S* returned)
@@ -74,3 +75,29 @@ __tls_init.exit:
 ; CHECK-NOT: ldp d27, d26
 ; CHECK-NOT: ldp d29, d28
 ; CHECK-NOT: ldp d31, d30
+
+; CHECK-LABEL: _ZTW4sum1
+; CHECK-NOT: stp d31, d30
+; CHECK-NOT: stp d29, d28
+; CHECK-NOT: stp d27, d26
+; CHECK-NOT: stp d25, d24
+; CHECK-NOT: stp d23, d22
+; CHECK-NOT: stp d21, d20
+; CHECK-NOT: stp d19, d18
+; CHECK-NOT: stp d17, d16
+; CHECK-NOT: stp d7, d6
+; CHECK-NOT: stp d5, d4
+; CHECK-NOT: stp d3, d2
+; CHECK-NOT: stp d1, d0
+; CHECK-NOT: stp x20, x19
+; CHECK-NOT: stp x14, x13
+; CHECK-NOT: stp x12, x11
+; CHECK-NOT: stp x10, x9
+; CHECK-NOT: stp x8, x7
+; CHECK-NOT: stp x6, x5
+; CHECK-NOT: stp x4, x3
+; CHECK-NOT: stp x2, x1
+; CHECK: blr
+define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind {
+  ret i32* @sum1
+}
diff --git a/test/CodeGen/ARM/cse-flags.ll b/test/CodeGen/ARM/cse-flags.ll
new file mode 100644
index 000000000000..c18e2fcb6039
--- /dev/null
+++ b/test/CodeGen/ARM/cse-flags.ll
@@ -0,0 +1,43 @@
+; RUN: llc -asm-verbose=false < %s | FileCheck %s
+; PR26063
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7--linux-gnueabihf"
+
+; CHECK: .LBB0_1:
+; CHECK-NEXT: bl      f{{$}}
+; CHECK-NEXT: ldrb    r[[T0:[0-9]+]], [r{{[0-9]+}}, #1]!{{$}}
+; CHECK-NEXT: cmp     r{{[0-9]+}}, #1{{$}}
+; CHECK-NEXT: cmpne   r[[T0]], #0{{$}}
+; CHECK-NEXT: bne     .LBB0_1{{$}}
+define i8* @h(i8* readonly %a, i32 %b, i32 %c) {
+entry:
+  %0 = load i8, i8* %a, align 1
+  %tobool4 = icmp ne i8 %0, 0
+  %cmp5 = icmp ne i32 %b, 1
+  %1 = and i1 %cmp5, %tobool4
+  br i1 %1, label %while.body.preheader, label %while.end
+
+while.body.preheader:                             ; preds = %entry
+  br label %while.body
+
+while.body:                                       ; preds = %while.body.preheader, %while.body
+  %a.addr.06 = phi i8* [ %incdec.ptr, %while.body ], [ %a, %while.body.preheader ]
+  %call = tail call i32 bitcast (i32 (...)* @f to i32 ()*)()
+  %incdec.ptr = getelementptr inbounds i8, i8* %a.addr.06, i32 1
+  %2 = load i8, i8* %incdec.ptr, align 1
+  %tobool = icmp ne i8 %2, 0
+  %cmp = icmp ne i32 %call, 1
+  %3 = and i1 %cmp, %tobool
+  br i1 %3, label %while.body, label %while.end.loopexit
+
+while.end.loopexit:                               ; preds = %while.body
+  %incdec.ptr.lcssa = phi i8* [ %incdec.ptr, %while.body ]
+  br label %while.end
+
+while.end:                                        ; preds = %while.end.loopexit, %entry
+  %a.addr.0.lcssa = phi i8* [ %a, %entry ], [ %incdec.ptr.lcssa, %while.end.loopexit ]
+  ret i8* %a.addr.0.lcssa
+}
+
+declare i32 @f(...)
diff --git a/test/CodeGen/ARM/cxx-tlscc.ll b/test/CodeGen/ARM/cxx-tlscc.ll
index 7b776d4b8e88..11173bbb1978 100644
--- a/test/CodeGen/ARM/cxx-tlscc.ll
+++ b/test/CodeGen/ARM/cxx-tlscc.ll
@@ -8,6 +8,7 @@
 @sg = internal thread_local global %struct.S zeroinitializer, align 1
 @__dso_handle = external global i8
 @__tls_guard = internal thread_local unnamed_addr global i1 false
+@sum1 = internal thread_local global i32 0, align 4
 
 declare %struct.S* @_ZN1SC1Ev(%struct.S* returned)
 declare %struct.S* @_ZN1SD1Ev(%struct.S* returned)
@@ -44,3 +45,13 @@ __tls_init.exit:
 ; CHECK-NOT: pop {r9, r12}
 ; CHECK-NOT: pop {r1, r2, r3, r4, r7, pc}
 ; CHECK: pop {lr}
+
+; CHECK-LABEL: _ZTW4sum1
+; CHECK-NOT: push {r1, r2, r3, r4, r7, lr}
+; CHECK-NOT: push {r9, r12}
+; CHECK-NOT: vpush {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
+; CHECK-NOT: vpush {d0, d1, d2, d3, d4, d5, d6, d7}
+; CHECK: blx
+define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind {
+  ret i32* @sum1
+}
diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll
index 66743f3e9d5e..46fef7629cc4 100644
--- a/test/CodeGen/ARM/memfunc.ll
+++ b/test/CodeGen/ARM/memfunc.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -mtriple=armv7-apple-ios -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-IOS
-; RUN: llc < %s -mtriple=thumbv7m-none-macho -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-DARWIN
-; RUN: llc < %s -mtriple=arm-none-eabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
-; RUN: llc < %s -mtriple=arm-none-eabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
-; RUN: llc < %s -mtriple=arm-none-androideabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
-; RUN: llc < %s -mtriple=arm-none-gnueabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI
-; RUN: llc < %s -mtriple=arm-none-gnueabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI
+; RUN: llc < %s -mtriple=armv7-apple-ios -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-IOS --check-prefix=CHECK
+; RUN: llc < %s -mtriple=thumbv7m-none-macho -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-DARWIN --check-prefix=CHECK
+; RUN: llc < %s -mtriple=arm-none-eabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI --check-prefix=CHECK
+; RUN: llc < %s -mtriple=arm-none-eabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI --check-prefix=CHECK
+; RUN: llc < %s -mtriple=arm-none-androideabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI --check-prefix=CHECK
+; RUN: llc < %s -mtriple=arm-none-gnueabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI --check-prefix=CHECK
+; RUN: llc < %s -mtriple=arm-none-gnueabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI --check-prefix=CHECK
 
 define void @f1(i8* %dest, i8* %src) {
 entry:
@@ -402,8 +402,8 @@ entry:
 ; CHECK: arr1:
 ; CHECK-IOS: .align 3
 ; CHECK-DARWIN: .align 2
-; CHECK-EABI: .align 2
-; CHECK-GNUEABI: .align 2
+; CHECK-EABI-NOT: .align
+; CHECK-GNUEABI-NOT: .align
 ; CHECK: arr2:
 ; CHECK: {{\.section.+foo,bar}}
 ; CHECK-NOT: .align
diff --git a/test/CodeGen/X86/2014-05-30-CombineAddNSW.ll b/test/CodeGen/X86/2014-05-30-CombineAddNSW.ll
deleted file mode 100644
index 4580795880ab..000000000000
--- a/test/CodeGen/X86/2014-05-30-CombineAddNSW.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
-; CHECK: addl
-
-; The two additions are the same , but have different flags.
-; In theory this code should never be generated by the frontend, but this 
-; tries to test that two identical instructions with two different flags
-; actually generate two different nodes.
-;
-; Normally the combiner would see this condition without the flags 
-; and optimize the result of the sub into a register clear
-; (the final result would be 0). With the different flags though the combiner 
-; needs to keep the add + sub nodes, because the two nodes result as different
-; nodes and so cannot assume that the subtraction of the two nodes
-; generates 0 as result
-define i32 @foo(i32 %a, i32 %b) {
-  %1 = add i32 %a, %b
-  %2 = add nsw i32 %a, %b
-  %3 = sub i32 %1, %2
-  ret i32 %3
-}
diff --git a/test/CodeGen/X86/cxx_tlscc64.ll b/test/CodeGen/X86/cxx_tlscc64.ll
index 70fe501040bf..6c8e45e42d15 100644
--- a/test/CodeGen/X86/cxx_tlscc64.ll
+++ b/test/CodeGen/X86/cxx_tlscc64.ll
@@ -4,11 +4,13 @@
 ; tricks similar to AArch64 fast TLS calling convention (r255821).
 ; Applying tricks on x86-64 similar to r255821.
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -enable-shrink-wrap=true | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -O0 | FileCheck %s --check-prefix=CHECK-O0
 %struct.S = type { i8 }
 
 @sg = internal thread_local global %struct.S zeroinitializer, align 1
 @__dso_handle = external global i8
 @__tls_guard = internal thread_local unnamed_addr global i1 false
+@sum1 = internal thread_local global i32 0, align 4
 
 declare void @_ZN1SC1Ev(%struct.S*)
 declare void @_ZN1SD1Ev(%struct.S*)
@@ -50,3 +52,28 @@ init.i:
 __tls_init.exit:
   ret %struct.S* @sg
 }
+
+; CHECK-LABEL: _ZTW4sum1
+; CHECK-NOT: pushq %r11
+; CHECK-NOT: pushq %r10
+; CHECK-NOT: pushq %r9
+; CHECK-NOT: pushq %r8
+; CHECK-NOT: pushq %rsi
+; CHECK-NOT: pushq %rdx
+; CHECK-NOT: pushq %rcx
+; CHECK-NOT: pushq %rbx
+; CHECK: callq
+define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind {
+  ret i32* @sum1
+}
+
+; Make sure at O0 we don't overwrite RBP.
+; CHECK-O0-LABEL: _ZTW4sum2
+; CHECK-O0: pushq %rbp
+; CHECK-O0: movq %rsp, %rbp
+; CHECK-O0-NOT: movq %r{{.*}}, (%rbp) 
+define cxx_fast_tlscc i32* @_ZTW4sum2() #0 {
+  ret i32* @sum1
+}
+
+attributes #0 = { nounwind "no-frame-pointer-elim"="true" }
diff --git a/test/CodeGen/X86/x86-shrink-wrap-unwind.ll b/test/CodeGen/X86/x86-shrink-wrap-unwind.ll
index 7c00f407b1e0..eb87f7101d7c 100644
--- a/test/CodeGen/X86/x86-shrink-wrap-unwind.ll
+++ b/test/CodeGen/X86/x86-shrink-wrap-unwind.ll
@@ -1,11 +1,5 @@
 ; RUN: llc %s -o - | FileCheck %s --check-prefix=CHECK
 ;
-; This test checks that we do not use shrink-wrapping when
-; the function does not have any frame pointer and may unwind.
-; This is a workaround for a limitation in the emission of
-; the CFI directives, that are not correct in such case.
-; PR25614
-;
 ; Note: This test cannot be merged with the shrink-wrapping tests
 ; because the booleans set on the command line take precedence on
 ; the target logic that disable shrink-wrapping.
@@ -13,6 +7,12 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "x86_64-apple-macosx"
 
 
+; This test checks that we do not use shrink-wrapping when
+; the function does not have any frame pointer and may unwind.
+; This is a workaround for a limitation in the emission of
+; the CFI directives, that are not correct in such case.
+; PR25614
+;
 ; No shrink-wrapping should occur here, until the CFI information are fixed.
 ; CHECK-LABEL: framelessUnwind:
 ;
@@ -151,3 +151,74 @@ false:
 }
 
 attributes #2 = { "no-frame-pointer-elim"="false" nounwind }
+
+
+; Check that we generate correct code for segmented stack.
+; We used to emit the code at the entry point of the function
+; instead of just before the prologue.
+; For now, shrink-wrapping is disabled on segmented stack functions: PR26107.
+;
+; CHECK-LABEL: segmentedStack:
+; CHECK: cmpq
+; CHECK-NEXT: ja [[ENTRY_LABEL:LBB[0-9_]+]]
+;
+; CHECK: callq ___morestack
+; CHECK-NEXT: retq
+;
+; CHECK: [[ENTRY_LABEL]]:
+; Prologue
+; CHECK: push
+;
+; In PR26107, we use to drop these two basic blocks, because
+; the segmentedStack entry block was jumping directly to
+; the place where the prologue is actually needed, which is
+; the call to memcmp.
+; Then, those two basic blocks did not have any predecessors
+; anymore and were removed.
+;
+; Check if vk1 is null
+; CHECK: testq %rdi, %rdi
+; CHECK-NEXT: je [[STRINGS_EQUAL:LBB[0-9_]+]]
+;
+; Check if vk2 is null
+; CHECK: testq %rsi, %rsi
+; CHECK-NEXT:  je [[STRINGS_EQUAL]]
+;
+; CHECK: [[STRINGS_EQUAL]]
+; CHECK-NEXT: popq
+define zeroext i1 @segmentedStack(i8* readonly %vk1, i8* readonly %vk2, i64 %key_size) #5 {
+entry:
+  %cmp.i = icmp eq i8* %vk1, null
+  %cmp1.i = icmp eq i8* %vk2, null
+  %brmerge.i = or i1 %cmp.i, %cmp1.i
+  %cmp1.mux.i = and i1 %cmp.i, %cmp1.i
+  br i1 %brmerge.i, label %__go_ptr_strings_equal.exit, label %if.end4.i
+
+if.end4.i:                                        ; preds = %entry
+  %tmp = getelementptr inbounds i8, i8* %vk1, i64 8
+  %tmp1 = bitcast i8* %tmp to i64*
+  %tmp2 = load i64, i64* %tmp1, align 8
+  %tmp3 = getelementptr inbounds i8, i8* %vk2, i64 8
+  %tmp4 = bitcast i8* %tmp3 to i64*
+  %tmp5 = load i64, i64* %tmp4, align 8
+  %cmp.i.i = icmp eq i64 %tmp2, %tmp5
+  br i1 %cmp.i.i, label %land.rhs.i.i, label %__go_ptr_strings_equal.exit
+
+land.rhs.i.i:                                     ; preds = %if.end4.i
+  %tmp6 = bitcast i8* %vk2 to i8**
+  %tmp7 = load i8*, i8** %tmp6, align 8
+  %tmp8 = bitcast i8* %vk1 to i8**
+  %tmp9 = load i8*, i8** %tmp8, align 8
+  %call.i.i = tail call i32 @memcmp(i8* %tmp9, i8* %tmp7, i64 %tmp2) #5
+  %cmp4.i.i = icmp eq i32 %call.i.i, 0
+  br label %__go_ptr_strings_equal.exit
+
+__go_ptr_strings_equal.exit:                      ; preds = %land.rhs.i.i, %if.end4.i, %entry
+  %retval.0.i = phi i1 [ %cmp1.mux.i, %entry ], [ false, %if.end4.i ], [ %cmp4.i.i, %land.rhs.i.i ]
+  ret i1 %retval.0.i
+}
+
+; Function Attrs: nounwind readonly
+declare i32 @memcmp(i8* nocapture, i8* nocapture, i64) #5
+
+attributes #5 = { nounwind readonly ssp uwtable "split-stack" }
-- 
cgit v1.2.3