31 files changed, 797 insertions, 42 deletions
diff --git a/test/CodeGen/SystemZ/alloca-03.ll b/test/CodeGen/SystemZ/alloca-03.ll
new file mode 100644
index 000000000000..ece1198ad62f
--- /dev/null
+++ b/test/CodeGen/SystemZ/alloca-03.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Allocate 8 bytes, no need to align stack.
+define void @f0() {
+; CHECK-LABEL: f0:
+; CHECK: aghi %r15, -168
+; CHECK-NOT: nil
+; CHECK: mvghi 160(%r15), 10
+; CHECK: aghi %r15, 168
+  %x = alloca i64
+  store volatile i64 10, i64* %x
+  ret void
+}
+
+; Allocate %len * 8, no need to align stack.
+define void @f1(i64 %len) {
+; CHECK-LABEL: f1:
+; CHECK: sllg    %r0, %r2, 3
+; CHECK: lgr     %r1, %r15
+; CHECK: sgr     %r1, %r0
+; CHECK-NOT: ngr
+; CHECK: lgr     %r15, %r1
+; CHECK: la      %r1, 160(%r1)
+; CHECK: mvghi   0(%r1), 10
+  %x = alloca i64, i64 %len
+  store volatile i64 10, i64* %x
+  ret void
+}
+
+; Static alloca, align 128.
+define void @f2() {
+; CHECK-LABEL: f2:
+; CHECK: aghi    %r1, -128
+; CHECK: lgr     %r15, %r1
+; CHECK: la      %r1, 280(%r1)
+; CHECK: nill	 %r1, 65408
+; CHECK: mvghi   0(%r1), 10
+  %x = alloca i64, i64 1, align 128
+  store volatile i64 10, i64* %x, align 128
+  ret void
+}
+
+; Dynamic alloca, align 128.
+define void @f3(i64 %len) {
+; CHECK-LABEL: f3:
+; CHECK: sllg	%r1, %r2, 3
+; CHECK: la	%r0, 120(%r1)
+; CHECK: lgr	%r1, %r15
+; CHECK: sgr	%r1, %r0
+; CHECK: lgr	%r15, %r1
+; CHECK: la	%r1, 280(%r1)
+; CHECK: nill	%r1, 65408
+; CHECK: mvghi	0(%r1), 10
+  %x = alloca i64, i64 %len, align 128
+  store volatile i64 10, i64* %x, align 128
+  ret void
+}
+
+; Static alloca w/out alignment - part of frame.
+define void @f4() {
+; CHECK-LABEL: f4:
+; CHECK: aghi    %r15, -168
+; CHECK: mvhi    164(%r15), 10
+; CHECK: aghi    %r15, 168
+  %x = alloca i32
+  store volatile i32 10, i32* %x
+  ret void
+}
+
+; Static alloca of one i32, aligned by 128.
+define void @f5() {
+; CHECK-LABEL: f5:
+
+; CHECK: lgr	%r1, %r15
+; CHECK: aghi	%r1, -128
+; CHECK: lgr	%r15, %r1
+; CHECK: la	%r1, 280(%r1)
+; CHECK: nill	%r1, 65408
+; CHECK: mvhi	0(%r1), 10
+  %x = alloca i32, i64 1, align 128
+  store volatile i32 10, i32* %x
+  ret void
+}
+
diff --git a/test/CodeGen/SystemZ/alloca-04.ll b/test/CodeGen/SystemZ/alloca-04.ll
new file mode 100644
index 000000000000..86c77493d3e9
--- /dev/null
+++ b/test/CodeGen/SystemZ/alloca-04.ll
@@ -0,0 +1,14 @@
+; Check the "no-realign-stack" function attribute. We should get a warning.
+
+; RUN: llc < %s -mtriple=s390x-linux-gnu -debug-only=codegen 2>&1 | \
+; RUN:   FileCheck %s
+; REQUIRES: asserts
+
+define void @f6() "no-realign-stack" {
+  %x = alloca i64, i64 1, align 128
+  store volatile i64 10, i64* %x, align 128
+  ret void
+}
+
+; CHECK: Warning: requested alignment 128 exceeds the stack alignment 8
+; CHECK-NOT: nill
diff --git a/test/CodeGen/SystemZ/args-01.ll b/test/CodeGen/SystemZ/args-01.ll
index 3105503eda53..113110faf341 100644
--- a/test/CodeGen/SystemZ/args-01.ll
+++ b/test/CodeGen/SystemZ/args-01.ll
@@ -30,12 +30,12 @@ define void @foo() {
 ;
 ; CHECK-FLOAT-LABEL: foo:
 ; CHECK-FLOAT: lzer %f0
-; CHECK-FLOAT: lcebr %f4, %f0
+; CHECK-FLOAT: lcdfr %f4, %f0
 ; CHECK-FLOAT: brasl %r14, bar@PLT
 ;
 ; CHECK-DOUBLE-LABEL: foo:
 ; CHECK-DOUBLE: lzdr %f2
-; CHECK-DOUBLE: lcdbr %f6, %f2
+; CHECK-DOUBLE: lcdfr %f6, %f2
 ; CHECK-DOUBLE: brasl %r14, bar@PLT
 ;
 ; CHECK-FP128-1-LABEL: foo:
diff --git a/test/CodeGen/SystemZ/args-02.ll b/test/CodeGen/SystemZ/args-02.ll
index 8686df88e679..89b080e821bf 100644
--- a/test/CodeGen/SystemZ/args-02.ll
+++ b/test/CodeGen/SystemZ/args-02.ll
@@ -31,12 +31,12 @@ define void @foo() {
 ;
 ; CHECK-FLOAT-LABEL: foo:
 ; CHECK-FLOAT: lzer %f0
-; CHECK-FLOAT: lcebr %f4, %f0
+; CHECK-FLOAT: lcdfr %f4, %f0
 ; CHECK-FLOAT: brasl %r14, bar@PLT
 ;
 ; CHECK-DOUBLE-LABEL: foo:
 ; CHECK-DOUBLE: lzdr %f2
-; CHECK-DOUBLE: lcdbr %f6, %f2
+; CHECK-DOUBLE: lcdfr %f6, %f2
 ; CHECK-DOUBLE: brasl %r14, bar@PLT
 ;
 ; CHECK-FP128-1-LABEL: foo:
diff --git a/test/CodeGen/SystemZ/args-03.ll b/test/CodeGen/SystemZ/args-03.ll
index d7d3ea105df7..a52782f4c183 100644
--- a/test/CodeGen/SystemZ/args-03.ll
+++ b/test/CodeGen/SystemZ/args-03.ll
@@ -31,12 +31,12 @@ define void @foo() {
 ;
 ; CHECK-FLOAT-LABEL: foo:
 ; CHECK-FLOAT: lzer %f0
-; CHECK-FLOAT: lcebr %f4, %f0
+; CHECK-FLOAT: lcdfr %f4, %f0
 ; CHECK-FLOAT: brasl %r14, bar@PLT
 ;
 ; CHECK-DOUBLE-LABEL: foo:
 ; CHECK-DOUBLE: lzdr %f2
-; CHECK-DOUBLE: lcdbr %f6, %f2
+; CHECK-DOUBLE: lcdfr %f6, %f2
 ; CHECK-DOUBLE: brasl %r14, bar@PLT
 ;
 ; CHECK-FP128-1-LABEL: foo:
diff --git a/test/CodeGen/SystemZ/args-04.ll b/test/CodeGen/SystemZ/args-04.ll
index 48a2cf491049..475cceb106e5 100644
--- a/test/CodeGen/SystemZ/args-04.ll
+++ b/test/CodeGen/SystemZ/args-04.ll
@@ -1,7 +1,7 @@
 ; Test incoming GPR, FPR and stack arguments when no extension type is given.
 ; This type of argument is used for passing structures, etc.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -verify-machineinstrs | FileCheck %s
 
 ; Do some arithmetic so that we can see the register being used.
 define i8 @f1(i8 %r2) {
diff --git a/test/CodeGen/SystemZ/args-07.ll b/test/CodeGen/SystemZ/args-07.ll
index 29d9b319ffc0..44a31fadd6d2 100644
--- a/test/CodeGen/SystemZ/args-07.ll
+++ b/test/CodeGen/SystemZ/args-07.ll
@@ -1,6 +1,6 @@
 ; Test multiple return values (LLVM ABI extension)
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -verify-machineinstrs| FileCheck %s
 
 ; Up to four integer return values fit into GPRs.
 define { i64, i64, i64, i64 } @f1() {
diff --git a/test/CodeGen/SystemZ/asm-17.ll b/test/CodeGen/SystemZ/asm-17.ll
index 533b5e90d62d..acf2aff45429 100644
--- a/test/CodeGen/SystemZ/asm-17.ll
+++ b/test/CodeGen/SystemZ/asm-17.ll
@@ -1,6 +1,7 @@
 ; Test explicit register names.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -no-integrated-as | FileCheck %s
+; RUN: llc < %s  -verify-machineinstrs -mtriple=s390x-linux-gnu -no-integrated-as \
+; RUN:   | FileCheck %s
 
 ; Test i32 GPRs.
 define i32 @f1() {
diff --git a/test/CodeGen/SystemZ/asm-18.ll b/test/CodeGen/SystemZ/asm-18.ll
index 999984be88d4..7909253d188c 100644
--- a/test/CodeGen/SystemZ/asm-18.ll
+++ b/test/CodeGen/SystemZ/asm-18.ll
@@ -1,7 +1,8 @@
 ; Test high-word operations, using "h" constraints to force a high
 ; register and "r" constraints to force a low register.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 -no-integrated-as | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=z196 \
+; RUN:   -no-integrated-as | FileCheck %s
 
 ; Test loads and stores involving mixtures of high and low registers.
 define void @f1(i32 *%ptr1, i32 *%ptr2) {
diff --git a/test/CodeGen/SystemZ/dag-combine-01.ll b/test/CodeGen/SystemZ/dag-combine-01.ll
new file mode 100644
index 000000000000..a56a118dadaa
--- /dev/null
+++ b/test/CodeGen/SystemZ/dag-combine-01.ll
@@ -0,0 +1,97 @@
+; Test that MergeConsecutiveStores() does not during DAG combining
+; incorrectly drop a chain dependency to a store previously chained to
+; one of two combined loads.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+@A = common global [2048 x float] zeroinitializer, align 4
+
+; Function Attrs: nounwind
+define signext i32 @main(i32 signext %argc, i8** nocapture readnone %argv) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv24 = phi i64 [ 0, %entry ], [ %indvars.iv.next25, %for.body ]
+  %sum.018 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
+  %0 = trunc i64 %indvars.iv24 to i32
+  %conv = sitofp i32 %0 to float
+  %arrayidx = getelementptr inbounds [2048 x float], [2048 x float]* @A, i64 0, i64 %indvars.iv24
+  store float %conv, float* %arrayidx, align 4
+  %add = fadd float %sum.018, %conv
+  %indvars.iv.next25 = add nuw nsw i64 %indvars.iv24, 1
+  %exitcond26 = icmp eq i64 %indvars.iv.next25, 2048
+  br i1 %exitcond26, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  br label %for.body.3.lr.ph.i.preheader
+
+for.body.3.lr.ph.i.preheader:                     ; preds = %complex_transpose.exit, %for.end
+  %i.116 = phi i32 [ 0, %for.end ], [ %inc9, %complex_transpose.exit ]
+  br label %for.body.3.lr.ph.i
+
+for.body.3.lr.ph.i:                               ; preds = %for.body.3.lr.ph.i.preheader, %for.inc.40.i
+  %indvars.iv19 = phi i32 [ 1, %for.body.3.lr.ph.i.preheader ], [ %indvars.iv.next20, %for.inc.40.i ]
+  %indvars.iv57.i = phi i64 [ 1, %for.body.3.lr.ph.i.preheader ], [ %indvars.iv.next58.i, %for.inc.40.i ]
+  %1 = shl nsw i64 %indvars.iv57.i, 1
+  %2 = shl nsw i64 %indvars.iv57.i, 6
+  br label %for.body.3.i
+
+for.body.3.i:                                     ; preds = %for.body.3.i, %for.body.3.lr.ph.i
+; CHECK-LABEL: .LBB0_5:
+; CHECK-NOT:    stfh    %r{{.*}}, 0(%r{{.*}})
+; CHECK:        lg      %r{{.*}}, -4(%r{{.*}})
+; Overlapping load should go before the store
+  %indvars.iv.i = phi i64 [ 0, %for.body.3.lr.ph.i ], [ %indvars.iv.next.i, %for.body.3.i ]
+  %3 = shl nsw i64 %indvars.iv.i, 6
+  %4 = add nuw nsw i64 %3, %1
+  %arrayidx.i = getelementptr inbounds [2048 x float], [2048 x float]* @A, i64 0, i64 %4
+  %5 = bitcast float* %arrayidx.i to i32*
+  %6 = load i32, i32* %5, align 4
+  %arrayidx9.i = getelementptr inbounds float, float* getelementptr inbounds ([2048 x float], [2048 x float]* @A, i64 0, i64 1), i64 %4
+  %7 = bitcast float* %arrayidx9.i to i32*
+  %8 = load i32, i32* %7, align 4
+  %9 = shl nsw i64 %indvars.iv.i, 1
+  %10 = add nuw nsw i64 %9, %2
+  %arrayidx14.i = getelementptr inbounds [2048 x float], [2048 x float]* @A, i64 0, i64 %10
+  %11 = bitcast float* %arrayidx14.i to i32*
+  %12 = load i32, i32* %11, align 4
+  %arrayidx19.i = getelementptr inbounds float, float* getelementptr inbounds ([2048 x float], [2048 x float]* @A, i64 0, i64 1), i64 %10
+  %13 = bitcast float* %arrayidx19.i to i32*
+  %14 = load i32, i32* %13, align 4
+  store i32 %6, i32* %11, align 4
+  store i32 %8, i32* %13, align 4
+  store i32 %12, i32* %5, align 4
+  store i32 %14, i32* %7, align 4
+  %indvars.iv.next.i = add nuw nsw i64 %indvars.iv.i, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next.i to i32
+  %exitcond21 = icmp eq i32 %lftr.wideiv, %indvars.iv19
+  br i1 %exitcond21, label %for.inc.40.i, label %for.body.3.i
+
+for.inc.40.i:                                     ; preds = %for.body.3.i
+  %indvars.iv.next58.i = add nuw nsw i64 %indvars.iv57.i, 1
+  %indvars.iv.next20 = add nuw nsw i32 %indvars.iv19, 1
+  %exitcond22 = icmp eq i64 %indvars.iv.next58.i, 32
+  br i1 %exitcond22, label %complex_transpose.exit, label %for.body.3.lr.ph.i
+
+complex_transpose.exit:                           ; preds = %for.inc.40.i
+  %inc9 = add nuw nsw i32 %i.116, 1
+  %exitcond23 = icmp eq i32 %inc9, 10
+  br i1 %exitcond23, label %for.body.14.preheader, label %for.body.3.lr.ph.i.preheader
+
+for.body.14.preheader:                            ; preds = %complex_transpose.exit
+  br label %for.body.14
+
+for.body.14:                                      ; preds = %for.body.14.preheader, %for.body.14
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body.14 ], [ 0, %for.body.14.preheader ]
+  %sum.115 = phi float [ %add17, %for.body.14 ], [ 0.000000e+00, %for.body.14.preheader ]
+  %arrayidx16 = getelementptr inbounds [2048 x float], [2048 x float]* @A, i64 0, i64 %indvars.iv
+  %15 = load float, float* %arrayidx16, align 4
+  %add17 = fadd float %sum.115, %15
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 2048
+  br i1 %exitcond, label %for.end.20, label %for.body.14
+
+for.end.20:                                       ; preds = %for.body.14
+  ret i32 0
+}
diff --git a/test/CodeGen/SystemZ/fp-abs-01.ll b/test/CodeGen/SystemZ/fp-abs-01.ll
index 3b143d93315b..3bb3ede457f3 100644
--- a/test/CodeGen/SystemZ/fp-abs-01.ll
+++ b/test/CodeGen/SystemZ/fp-abs-01.ll
@@ -7,7 +7,7 @@
 declare float @llvm.fabs.f32(float %f)
 define float @f1(float %f) {
 ; CHECK-LABEL: f1:
-; CHECK: lpebr %f0, %f0
+; CHECK: lpdfr %f0, %f0
 ; CHECK: br %r14
   %res = call float @llvm.fabs.f32(float %f)
   ret float %res
@@ -17,7 +17,7 @@ define float @f1(float %f) {
 declare double @llvm.fabs.f64(double %f)
 define double @f2(double %f) {
 ; CHECK-LABEL: f2:
-; CHECK: lpdbr %f0, %f0
+; CHECK: lpdfr %f0, %f0
 ; CHECK: br %r14
   %res = call double @llvm.fabs.f64(double %f)
   ret double %res
diff --git a/test/CodeGen/SystemZ/fp-abs-02.ll b/test/CodeGen/SystemZ/fp-abs-02.ll
index e831ddb86fea..b2d2cfd52b6a 100644
--- a/test/CodeGen/SystemZ/fp-abs-02.ll
+++ b/test/CodeGen/SystemZ/fp-abs-02.ll
@@ -7,7 +7,7 @@
 declare float @llvm.fabs.f32(float %f)
 define float @f1(float %f) {
 ; CHECK-LABEL: f1:
-; CHECK: lnebr %f0, %f0
+; CHECK: lndfr %f0, %f0
 ; CHECK: br %r14
   %abs = call float @llvm.fabs.f32(float %f)
   %res = fsub float -0.0, %abs
@@ -18,7 +18,7 @@ define float @f1(float %f) {
 declare double @llvm.fabs.f64(double %f)
 define double @f2(double %f) {
 ; CHECK-LABEL: f2:
-; CHECK: lndbr %f0, %f0
+; CHECK: lndfr %f0, %f0
 ; CHECK: br %r14
   %abs = call double @llvm.fabs.f64(double %f)
   %res = fsub double -0.0, %abs
diff --git a/test/CodeGen/SystemZ/fp-add-02.ll b/test/CodeGen/SystemZ/fp-add-02.ll
index 5be1ad79d453..4f98742197bd 100644
--- a/test/CodeGen/SystemZ/fp-add-02.ll
+++ b/test/CodeGen/SystemZ/fp-add-02.ll
@@ -2,7 +2,7 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
 ; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs | FileCheck %s
 declare double @foo()
 
 ; Check register addition.
diff --git a/test/CodeGen/SystemZ/fp-cmp-02.ll b/test/CodeGen/SystemZ/fp-cmp-02.ll
index 94a256777c75..0808ddd8db48 100644
--- a/test/CodeGen/SystemZ/fp-cmp-02.ll
+++ b/test/CodeGen/SystemZ/fp-cmp-02.ll
@@ -3,7 +3,7 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
 ; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs\
 ; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
 
 declare double @foo()
@@ -164,8 +164,7 @@ define i64 @f8(i64 %a, i64 %b, double %f) {
 ; CHECK-SCALAR: ltdbr %f0, %f0
 ; CHECK-SCALAR-NEXT: je
 ; CHECK-SCALAR: lgr %r2, %r3
-; CHECK-VECTOR: lzdr %f1
-; CHECK-VECTOR-NEXT: cdbr %f0, %f1
+; CHECK-VECTOR: ltdbr %f0, %f0
 ; CHECK-VECTOR-NEXT: locgrne %r2, %r3
 ; CHECK: br %r14
   %cond = fcmp oeq double %f, 0.0
diff --git a/test/CodeGen/SystemZ/fp-cmp-05.ll b/test/CodeGen/SystemZ/fp-cmp-05.ll
new file mode 100644
index 000000000000..c8eb18c6e6ba
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-cmp-05.ll
@@ -0,0 +1,80 @@
+; Test that floating-point instructions that set cc are used to
+; eliminate compares for load complement, load negative and load
+; positive.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Load complement (sign-bit flipped).
+; Test f32
+define float @f1(float %a, float %b, float %f) {
+; CHECK-LABEL: f1:
+; CHECK: lcebr
+; CHECK-NEXT: je
+  %neg = fsub float -0.0, %f
+  %cond = fcmp oeq float %neg, 0.0
+  %res = select i1 %cond, float %a, float %b
+  ret float %res
+}
+
+; Test f64
+define double @f2(double %a, double %b, double %f) {
+; CHECK-LABEL: f2:
+; CHECK: lcdbr
+; CHECK-NEXT: je
+  %neg = fsub double -0.0, %f
+  %cond = fcmp oeq double %neg, 0.0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Negation of floating-point absolute.
+; Test f32
+declare float @llvm.fabs.f32(float %f)
+define float @f3(float %a, float %b, float %f) {
+; CHECK-LABEL: f3:
+; CHECK: lnebr
+; CHECK-NEXT: je
+  %abs = call float @llvm.fabs.f32(float %f)
+  %neg = fsub float -0.0, %abs
+  %cond = fcmp oeq float %neg, 0.0
+  %res = select i1 %cond, float %a, float %b
+  ret float %res
+}
+
+; Test f64
+declare double @llvm.fabs.f64(double %f)
+define double @f4(double %a, double %b, double %f) {
+; CHECK-LABEL: f4:
+; CHECK: lndbr
+; CHECK-NEXT: je
+  %abs = call double @llvm.fabs.f64(double %f)
+  %neg = fsub double -0.0, %abs
+  %cond = fcmp oeq double %neg, 0.0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Absolute floating-point value.
+; Test f32
+define float @f5(float %a, float %b, float %f) {
+; CHECK-LABEL: f5:
+; CHECK: lpebr
+; CHECK-NEXT: je
+  %abs = call float @llvm.fabs.f32(float %f)
+  %cond = fcmp oeq float %abs, 0.0
+  %res = select i1 %cond, float %a, float %b
+  ret float %res
+}
+
+; Test f64
+define double @f6(double %a, double %b, double %f) {
+; CHECK-LABEL: f6:
+; CHECK: lpdbr
+; CHECK-NEXT: je
+  %abs = call double @llvm.fabs.f64(double %f)
+  %cond = fcmp oeq double %abs, 0.0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
diff --git a/test/CodeGen/SystemZ/fp-const-02.ll b/test/CodeGen/SystemZ/fp-const-02.ll
index 96f857895ecf..942465c06600 100644
--- a/test/CodeGen/SystemZ/fp-const-02.ll
+++ b/test/CodeGen/SystemZ/fp-const-02.ll
@@ -6,7 +6,7 @@
 define float @f1() {
 ; CHECK-LABEL: f1:
 ; CHECK: lzer [[REGISTER:%f[0-5]+]]
-; CHECK: lcebr %f0, [[REGISTER]]
+; CHECK: lcdfr %f0, [[REGISTER]]
 ; CHECK: br %r14
   ret float -0.0
 }
@@ -15,7 +15,7 @@ define float @f1() {
 define double @f2() {
 ; CHECK-LABEL: f2:
 ; CHECK: lzdr [[REGISTER:%f[0-5]+]]
-; CHECK: lcdbr %f0, [[REGISTER]]
+; CHECK: lcdfr %f0, [[REGISTER]]
 ; CHECK: br %r14
   ret double -0.0
 }
diff --git a/test/CodeGen/SystemZ/fp-libcall.ll b/test/CodeGen/SystemZ/fp-libcall.ll
new file mode 100644
index 000000000000..75250b811cba
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-libcall.ll
@@ -0,0 +1,273 @@
+; Test that library calls are emitted for LLVM IR intrinsics
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+define float @f1(float %x, i32 %y) {
+; CHECK-LABEL: f1:
+; CHECK: brasl %r14, __powisf2@PLT
+  %tmp = call float @llvm.powi.f32(float %x, i32 %y)
+  ret float %tmp
+}
+
+define double @f2(double %x, i32 %y) {
+; CHECK-LABEL: f2:
+; CHECK: brasl %r14, __powidf2@PLT
+  %tmp = call double @llvm.powi.f64(double %x, i32 %y)
+  ret double %tmp
+}
+
+define fp128 @f3(fp128 %x, i32 %y) {
+; CHECK-LABEL: f3:
+; CHECK: brasl %r14, __powitf2@PLT
+  %tmp = call fp128 @llvm.powi.f128(fp128 %x, i32 %y)
+  ret fp128 %tmp
+}
+
+define float @f4(float %x, float %y) {
+; CHECK-LABEL: f4:
+; CHECK: brasl %r14, powf@PLT
+  %tmp = call float @llvm.pow.f32(float %x, float %y)
+  ret float %tmp
+}
+
+define double @f5(double %x, double %y) {
+; CHECK-LABEL: f5:
+; CHECK: brasl %r14, pow@PLT
+  %tmp = call double @llvm.pow.f64(double %x, double %y)
+  ret double %tmp
+}
+
+define fp128 @f6(fp128 %x, fp128 %y) {
+; CHECK-LABEL: f6:
+; CHECK: brasl %r14, powl@PLT
+  %tmp = call fp128 @llvm.pow.f128(fp128 %x, fp128 %y)
+  ret fp128 %tmp
+}
+
+define float @f7(float %x) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, sinf@PLT
+  %tmp = call float @llvm.sin.f32(float %x)
+  ret float %tmp
+}
+
+define double @f8(double %x) {
+; CHECK-LABEL: f8:
+; CHECK: brasl %r14, sin@PLT
+  %tmp = call double @llvm.sin.f64(double %x)
+  ret double %tmp
+}
+
+define fp128 @f9(fp128 %x) {
+; CHECK-LABEL: f9:
+; CHECK: brasl %r14, sinl@PLT
+  %tmp = call fp128 @llvm.sin.f128(fp128 %x)
+  ret fp128 %tmp
+}
+
+define float @f10(float %x) {
+; CHECK-LABEL: f10:
+; CHECK: brasl %r14, cosf@PLT
+  %tmp = call float @llvm.cos.f32(float %x)
+  ret float %tmp
+}
+
+define double @f11(double %x) {
+; CHECK-LABEL: f11:
+; CHECK: brasl %r14, cos@PLT
+  %tmp = call double @llvm.cos.f64(double %x)
+  ret double %tmp
+}
+
+define fp128 @f12(fp128 %x) {
+; CHECK-LABEL: f12:
+; CHECK: brasl %r14, cosl@PLT
+  %tmp = call fp128 @llvm.cos.f128(fp128 %x)
+  ret fp128 %tmp
+}
+
+define float @f13(float %x) {
+; CHECK-LABEL: f13:
+; CHECK: brasl %r14, expf@PLT
+  %tmp = call float @llvm.exp.f32(float %x)
+  ret float %tmp
+}
+
+define double @f14(double %x) {
+; CHECK-LABEL: f14:
+; CHECK: brasl %r14, exp@PLT
+  %tmp = call double @llvm.exp.f64(double %x)
+  ret double %tmp
+}
+
+define fp128 @f15(fp128 %x) {
+; CHECK-LABEL: f15:
+; CHECK: brasl %r14, expl@PLT
+  %tmp = call fp128 @llvm.exp.f128(fp128 %x)
+  ret fp128 %tmp
+}
+
+define float @f16(float %x) {
+; CHECK-LABEL: f16:
+; CHECK: brasl %r14, exp2f@PLT
+  %tmp = call float @llvm.exp2.f32(float %x)
+  ret float %tmp
+}
+
+define double @f17(double %x) {
+; CHECK-LABEL: f17:
+; CHECK: brasl %r14, exp2@PLT
+  %tmp = call double @llvm.exp2.f64(double %x)
+  ret double %tmp
+}
+
+define fp128 @f18(fp128 %x) {
+; CHECK-LABEL: f18:
+; CHECK: brasl %r14, exp2l@PLT
+  %tmp = call fp128 @llvm.exp2.f128(fp128 %x)
+  ret fp128 %tmp
+}
+
+define float @f19(float %x) {
+; CHECK-LABEL: f19:
+; CHECK: brasl %r14, logf@PLT
+  %tmp = call float @llvm.log.f32(float %x)
+  ret float %tmp
+}
+
+define double @f20(double %x) {
+; CHECK-LABEL: f20:
+; CHECK: brasl %r14, log@PLT
+  %tmp = call double @llvm.log.f64(double %x)
+  ret double %tmp
+}
+
+define fp128 @f21(fp128 %x) {
+; CHECK-LABEL: f21:
+; CHECK: brasl %r14, logl@PLT
+  %tmp = call fp128 @llvm.log.f128(fp128 %x)
+  ret fp128 %tmp
+}
+
+define float @f22(float %x) {
+; CHECK-LABEL: f22:
+; CHECK: brasl %r14, log2f@PLT
+  %tmp = call float @llvm.log2.f32(float %x)
+  ret float %tmp
+}
+
+define double @f23(double %x) {
+; CHECK-LABEL: f23:
+; CHECK: brasl %r14, log2@PLT
+  %tmp = call double @llvm.log2.f64(double %x)
+  ret double %tmp
+}
+
+define fp128 @f24(fp128 %x) {
+; CHECK-LABEL: f24:
+; CHECK: brasl %r14, log2l@PLT
+  %tmp = call fp128 @llvm.log2.f128(fp128 %x)
+  ret fp128 %tmp
+}
+
+define float @f25(float %x) {
+; CHECK-LABEL: f25:
+; CHECK: brasl %r14, log10f@PLT
+  %tmp = call float @llvm.log10.f32(float %x)
+  ret float %tmp
+}
+
+define double @f26(double %x) {
+; CHECK-LABEL: f26:
+; CHECK: brasl %r14, log10@PLT
+  %tmp = call double @llvm.log10.f64(double %x)
+  ret double %tmp
+}
+
+define fp128 @f27(fp128 %x) {
+; CHECK-LABEL: f27:
+; CHECK: brasl %r14, log10l@PLT
+  %tmp = call fp128 @llvm.log10.f128(fp128 %x)
+  ret fp128 %tmp
+}
+
+define float @f28(float %x, float %y) {
+; CHECK-LABEL: f28:
+; CHECK: brasl %r14, fminf@PLT
+  %tmp = call float @llvm.minnum.f32(float %x, float %y)
+  ret float %tmp
+}
+
+define double @f29(double %x, double %y) {
+; CHECK-LABEL: f29:
+; CHECK: brasl %r14, fmin@PLT
+  %tmp = call double @llvm.minnum.f64(double %x, double %y)
+  ret double %tmp
+}
+
+define fp128 @f30(fp128 %x, fp128 %y) {
+; CHECK-LABEL: f30:
+; CHECK: brasl %r14, fminl@PLT
+  %tmp = call fp128 @llvm.minnum.f128(fp128 %x, fp128 %y)
+  ret fp128 %tmp
+}
+
+define float @f31(float %x, float %y) {
+; CHECK-LABEL: f31:
+; CHECK: brasl %r14, fmaxf@PLT
+  %tmp = call float @llvm.maxnum.f32(float %x, float %y)
+  ret float %tmp
+}
+
+define double @f32(double %x, double %y) {
+; CHECK-LABEL: f32:
+; CHECK: brasl %r14, fmax@PLT
+  %tmp = call double @llvm.maxnum.f64(double %x, double %y)
+  ret double %tmp
+}
+
+define fp128 @f33(fp128 %x, fp128 %y) {
+; CHECK-LABEL: f33:
+; CHECK: brasl %r14, fmaxl@PLT
+  %tmp = call fp128 @llvm.maxnum.f128(fp128 %x, fp128 %y)
+  ret fp128 %tmp
+}
+
+declare float @llvm.powi.f32(float, i32)
+declare double @llvm.powi.f64(double, i32)
+declare fp128 @llvm.powi.f128(fp128, i32)
+declare float @llvm.pow.f32(float, float)
+declare double @llvm.pow.f64(double, double)
+declare fp128 @llvm.pow.f128(fp128, fp128)
+
+declare float @llvm.sin.f32(float)
+declare double @llvm.sin.f64(double)
+declare fp128 @llvm.sin.f128(fp128)
+declare float @llvm.cos.f32(float)
+declare double @llvm.cos.f64(double)
+declare fp128 @llvm.cos.f128(fp128)
+
+declare float @llvm.exp.f32(float)
+declare double @llvm.exp.f64(double)
+declare fp128 @llvm.exp.f128(fp128)
+declare float @llvm.exp2.f32(float)
+declare double @llvm.exp2.f64(double)
+declare fp128 @llvm.exp2.f128(fp128)
+
+declare float @llvm.log.f32(float)
+declare double @llvm.log.f64(double)
+declare fp128 @llvm.log.f128(fp128)
+declare float @llvm.log2.f32(float)
+declare double @llvm.log2.f64(double)
+declare fp128 @llvm.log2.f128(fp128)
+declare float @llvm.log10.f32(float)
+declare double @llvm.log10.f64(double)
+declare fp128 @llvm.log10.f128(fp128)
+
+declare float @llvm.minnum.f32(float, float)
+declare double @llvm.minnum.f64(double, double)
+declare fp128 @llvm.minnum.f128(fp128, fp128)
+declare float @llvm.maxnum.f32(float, float)
+declare double @llvm.maxnum.f64(double, double)
+declare fp128 @llvm.maxnum.f128(fp128, fp128)
+
diff --git a/test/CodeGen/SystemZ/fp-move-05.ll b/test/CodeGen/SystemZ/fp-move-05.ll
index da12af6d68c1..0864deee5137 100644
--- a/test/CodeGen/SystemZ/fp-move-05.ll
+++ b/test/CodeGen/SystemZ/fp-move-05.ll
@@ -1,6 +1,6 @@
 ; Test 128-bit floating-point loads.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -verify-machineinstrs | FileCheck %s
 
 ; Check loads with no offset.
 define double @f1(i64 %src) {
diff --git a/test/CodeGen/SystemZ/fp-neg-01.ll b/test/CodeGen/SystemZ/fp-neg-01.ll
index fe2e5f67cf5b..b9810f9f34d3 100644
--- a/test/CodeGen/SystemZ/fp-neg-01.ll
+++ b/test/CodeGen/SystemZ/fp-neg-01.ll
@@ -6,7 +6,7 @@
 ; Test f32.
 define float @f1(float %f) {
 ; CHECK-LABEL: f1:
-; CHECK: lcebr %f0, %f0
+; CHECK: lcdfr %f0, %f0
 ; CHECK: br %r14
   %res = fsub float -0.0, %f
   ret float %res
@@ -15,7 +15,7 @@ define float @f1(float %f) {
 ; Test f64.
 define double @f2(double %f) {
 ; CHECK-LABEL: f2:
-; CHECK: lcdbr %f0, %f0
+; CHECK: lcdfr %f0, %f0
 ; CHECK: br %r14
   %res = fsub double -0.0, %f
   ret double %res
diff --git a/test/CodeGen/SystemZ/fp-sincos-01.ll b/test/CodeGen/SystemZ/fp-sincos-01.ll
new file mode 100644
index 000000000000..cd182a590eee
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-sincos-01.ll
@@ -0,0 +1,56 @@
+; Test that combined sin/cos library call is emitted when appropriate
+
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s --check-prefix=CHECK-NOOPT
+; RUN: llc < %s -mtriple=s390x-linux-gnu -enable-unsafe-fp-math | FileCheck %s --check-prefix=CHECK-OPT
+
+define float @f1(float %x) {
+; CHECK-OPT-LABEL: f1:
+; CHECK-OPT: brasl %r14, sincosf@PLT
+; CHECK-OPT: le %f0, 164(%r15)
+; CHECK-OPT: aeb %f0, 160(%r15)
+
+; CHECK-NOOPT-LABEL: f1:
+; CHECK-NOOPT: brasl %r14, sinf@PLT
+; CHECK-NOOPT: brasl %r14, cosf@PLT
+  %tmp1 = call float @sinf(float %x)
+  %tmp2 = call float @cosf(float %x)
+  %add = fadd float %tmp1, %tmp2
+  ret float %add
+}
+
+define double @f2(double %x) {
+; CHECK-OPT-LABEL: f2:
+; CHECK-OPT: brasl %r14, sincos@PLT
+; CHECK-OPT: ld %f0, 168(%r15)
+; CHECK-OPT: adb %f0, 160(%r15)
+
+; CHECK-NOOPT-LABEL: f2:
+; CHECK-NOOPT: brasl %r14, sin@PLT
+; CHECK-NOOPT: brasl %r14, cos@PLT
+  %tmp1 = call double @sin(double %x)
+  %tmp2 = call double @cos(double %x)
+  %add = fadd double %tmp1, %tmp2
+  ret double %add
+}
+
+define fp128 @f3(fp128 %x) {
+; CHECK-OPT-LABEL: f3:
+; CHECK-OPT: brasl %r14, sincosl@PLT
+; CHECK-OPT: axbr
+
+; CHECK-NOOPT-LABEL: f3:
+; CHECK-NOOPT: brasl %r14, sinl@PLT
+; CHECK-NOOPT: brasl %r14, cosl@PLT
+  %tmp1 = call fp128 @sinl(fp128 %x)
+  %tmp2 = call fp128 @cosl(fp128 %x)
+  %add = fadd fp128 %tmp1, %tmp2
+  ret fp128 %add
+}
+
+declare float @sinf(float) readonly
+declare double @sin(double) readonly
+declare fp128 @sinl(fp128) readonly
+declare float @cosf(float) readonly
+declare double @cos(double) readonly
+declare fp128 @cosl(fp128) readonly
+
diff --git a/test/CodeGen/SystemZ/insert-05.ll b/test/CodeGen/SystemZ/insert-05.ll
index b76859a568f3..1ea8a64e28e3 100644
--- a/test/CodeGen/SystemZ/insert-05.ll
+++ b/test/CodeGen/SystemZ/insert-05.ll
@@ -214,8 +214,8 @@ define i64 @f18(i32 %a) {
 ; The truncation here isn't free; we need an explicit zero extension.
 define i64 @f19(i32 %a) {
 ; CHECK-LABEL: f19:
-; CHECK: llgcr %r2, %r2
-; CHECK: oihl %r2, 1
+; CHECK: llcr %r2, %r2
+; CHECK: iihf %r2, 1
 ; CHECK: br %r14
   %trunc = trunc i32 %a to i8
   %ext = zext i8 %trunc to i64
diff --git a/test/CodeGen/SystemZ/int-cmp-44.ll b/test/CodeGen/SystemZ/int-cmp-44.ll
index 97d48521254d..a87dccd4ac2a 100644
--- a/test/CodeGen/SystemZ/int-cmp-44.ll
+++ b/test/CodeGen/SystemZ/int-cmp-44.ll
@@ -1,7 +1,8 @@
 ; Test that compares are omitted if CC already has the right value
 ; (z10 version).
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -no-integrated-as | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -no-integrated-as \
+; RUN:   -verify-machineinstrs| FileCheck %s
 
 declare void @foo()
 
diff --git a/test/CodeGen/SystemZ/int-cmp-51.ll b/test/CodeGen/SystemZ/int-cmp-51.ll
new file mode 100644
index 000000000000..85a0e4b4d3a7
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-51.ll
@@ -0,0 +1,34 @@
+; Check that modelling of CC/CCRegs does not stop MachineCSE from
+; removing a compare.  MachineCSE will not extend a live range of an
+; allocatable or reserved phys reg.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare void @bar(i8)
+
+; Check the low end of the CH range.
+define void @f1(i32 %lhs) {
+; CHECK-LABEL: BB#1:
+; CHECK-NOT: cijlh %r0, 1, .LBB0_3
+
+entry:
+  %and188 = and i32 %lhs, 255
+  %cmp189 = icmp ult i32 %and188, 2
+  br i1 %cmp189, label %if.then.191, label %if.else.201
+
+if.then.191:
+  %cmp194 = icmp eq i32 %and188, 1
+  br i1 %cmp194, label %if.then.196, label %if.else.198
+
+if.then.196:
+  call void @bar(i8 1);
+  br label %if.else.201
+
+if.else.198:
+  call void @bar(i8 0);
+  br label %if.else.201
+
+if.else.201:
+  ret void
+}
+
diff --git a/test/CodeGen/SystemZ/int-cmp-52.ll b/test/CodeGen/SystemZ/int-cmp-52.ll
new file mode 100644
index 000000000000..a0b72371d1c5
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-52.ll
@@ -0,0 +1,24 @@
+; This used to crash the backend due to a failed assertion.
+; No particular output expected, but must compile.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu
+
+define void @test(i16 *%input, i32 *%result) {
+entry:
+  %0 = load i16, i16* %input, align 2
+  %1 = zext i16 %0 to i32
+  %2 = icmp slt i32 %1, 0
+  br i1 %2, label %if.then, label %if.else
+
+if.then:
+  store i32 1, i32* %result, align 4
+  br label %return
+
+if.else:
+  store i32 0, i32* %result, align 4
+  br label %return
+
+return:
+  ret void
+}
+
diff --git a/test/CodeGen/SystemZ/memchr-01.ll b/test/CodeGen/SystemZ/memchr-01.ll
index c51690b9848d..f7509c4f256b 100644
--- a/test/CodeGen/SystemZ/memchr-01.ll
+++ b/test/CodeGen/SystemZ/memchr-01.ll
@@ -1,6 +1,6 @@
 ; Test memchr using SRST, with a weird but usable prototype.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -verify-machineinstrs | FileCheck %s
 
 declare i8 *@memchr(i8 *%src, i16 %char, i32 %len)
 
diff --git a/test/CodeGen/SystemZ/spill-01.ll b/test/CodeGen/SystemZ/spill-01.ll
index a59c06f192b6..9be4420fd839 100644
--- a/test/CodeGen/SystemZ/spill-01.ll
+++ b/test/CodeGen/SystemZ/spill-01.ll
@@ -1,7 +1,7 @@
 ; Test spilling using MVC.  The tests here assume z10 register pressure,
 ; without the high words being available.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -verify-machineinstrs | FileCheck %s
 
 declare void @foo()
 
diff --git a/test/CodeGen/SystemZ/vec-args-04.ll b/test/CodeGen/SystemZ/vec-args-04.ll
index 3a25404934e2..5176d80f08fb 100644
--- a/test/CodeGen/SystemZ/vec-args-04.ll
+++ b/test/CodeGen/SystemZ/vec-args-04.ll
@@ -21,17 +21,25 @@ define void @foo() {
 ; CHECK-VEC-DAG: vrepib %v31, 8
 ; CHECK-VEC: brasl %r14, bar@PLT
 ;
+
+; CHECK-STACK: .LCPI0_0:
+; CHECK-STACK:	.quad	795741901033570304      # 0xb0b0b0b00000000
+; CHECK-STACK:	.quad	868082074056920076      # 0xc0c0c0c0c0c0c0c
+; CHECK-STACK: .LCPI0_1:
+; CHECK-STACK:	.quad	648518346341351424      # 0x900000000000000
+; CHECK-STACK:	.quad	723390690146385920      # 0xa0a000000000000
+
 ; CHECK-STACK-LABEL: foo:
 ; CHECK-STACK: aghi %r15, -192
-; CHECK-STACK-DAG: llihh [[REG1:%r[0-9]+]], 2304
-; CHECK-STACK-DAG: stg [[REG1]], 160(%r15)
-; CHECK-STACK-DAG: llihh [[REG2:%r[0-9]+]], 2570
-; CHECK-STACK-DAG: stg [[REG2]], 168(%r15)
-; CHECK-STACK-DAG: llihf [[REG3:%r[0-9]+]], 185273099
-; CHECK-STACK-DAG: stg [[REG3]], 176(%r15)
-; CHECK-STACK-DAG: llihf [[REG4:%r[0-9]+]], 202116108
-; CHECK-STACK-DAG: oilf [[REG4]], 202116108
-; CHECK-STACK-DAG: stg [[REG4]], 176(%r15)
+
+; CHECK-STACK-DAG: larl [[REG1:%r[0-9]+]], .LCPI0_0
+; CHECK-STACK-DAG: vl [[VREG0:%v[0-9]+]], 0([[REG1]])
+; CHECK-STACK-DAG: vst [[VREG0]], 176(%r15)
+
+; CHECK-STACK-DAG: larl [[REG2:%r[0-9]+]], .LCPI0_1
+; CHECK-STACK-DAG: vl [[VREG1:%v[0-9]+]], 0([[REG2]])
+; CHECK-STACK-DAG: vst [[VREG1]], 160(%r15)
+
 ; CHECK-STACK: brasl %r14, bar@PLT
 
   call void @bar (<1 x i8> <i8 1>,
diff --git a/test/CodeGen/SystemZ/vec-args-05.ll b/test/CodeGen/SystemZ/vec-args-05.ll
index cd1448b8611e..8c5ff8414292 100644
--- a/test/CodeGen/SystemZ/vec-args-05.ll
+++ b/test/CodeGen/SystemZ/vec-args-05.ll
@@ -14,12 +14,14 @@ define void @foo() {
 ; CHECK-VEC-DAG: vrepib %v26, 2
 ; CHECK-VEC: brasl %r14, bar@PLT
 ;
+; CHECK-STACK: .LCPI0_0:
+; CHECK-STACK: .quad	217020518463700992      # 0x303030300000000
+; CHECK-STACK: .quad	289360691284934656      # 0x404040400000000
 ; CHECK-STACK-LABEL: foo:
 ; CHECK-STACK: aghi %r15, -176
-; CHECK-STACK-DAG: llihf [[REG1:%r[0-9]+]], 50529027
-; CHECK-STACK-DAG: stg [[REG1]], 160(%r15)
-; CHECK-STACK-DAG: llihf [[REG2:%r[0-9]+]], 67372036
-; CHECK-STACK-DAG: stg [[REG2]], 168(%r15)
+; CHECK-STACK-DAG: larl [[REG1:%r[0-9]+]], .LCPI0_0
+; CHECK-STACK-DAG: vl [[VREG:%v[0-9]+]], 0([[REG1]])
+; CHECK-STACK-DAG: vst [[VREG]], 160(%r15)
 ; CHECK-STACK: brasl %r14, bar@PLT
 
   call void (<4 x i8>, <4 x i8>, ...) @bar
diff --git a/test/CodeGen/SystemZ/vec-perm-12.ll b/test/CodeGen/SystemZ/vec-perm-12.ll
new file mode 100644
index 000000000000..b70b13d90682
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-perm-12.ll
@@ -0,0 +1,43 @@
+; Test inserting a truncated value into a vector element
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
+; RUN:   FileCheck -check-prefix=CHECK-CODE %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
+; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
+
+define <4 x i32> @f1(<4 x i32> %x, i64 %y) {
+; CHECK-CODE-LABEL: f1:
+; CHECK-CODE: vlvgf [[ELT:%v[0-9]+]], %r2, 0
+; CHECK-CODE: larl [[REG:%r[0-5]]],
+; CHECK-CODE: vl [[MASK:%v[0-9]+]], 0([[REG]])
+; CHECK-CODE: vperm %v24, %v24, [[ELT]], [[MASK]]
+; CHECK-CODE: br %r14
+
+; CHECK-VECTOR: .byte 12
+; CHECK-VECTOR-NEXT: .byte 13
+; CHECK-VECTOR-NEXT: .byte 14
+; CHECK-VECTOR-NEXT: .byte 15
+; CHECK-VECTOR-NEXT: .byte 8
+; CHECK-VECTOR-NEXT: .byte 9
+; CHECK-VECTOR-NEXT: .byte 10
+; CHECK-VECTOR-NEXT: .byte 11
+; CHECK-VECTOR-NEXT: .byte 4
+; CHECK-VECTOR-NEXT: .byte 5
+; CHECK-VECTOR-NEXT: .byte 6
+; CHECK-VECTOR-NEXT: .byte 7
+; CHECK-VECTOR-NEXT: .byte 16
+; CHECK-VECTOR-NEXT: .byte 17
+; CHECK-VECTOR-NEXT: .byte 18
+; CHECK-VECTOR-NEXT: .byte 19
+
+  %elt0 = extractelement <4 x i32> %x, i32 3
+  %elt1 = extractelement <4 x i32> %x, i32 2
+  %elt2 = extractelement <4 x i32> %x, i32 1
+  %elt3 = trunc i64 %y to i32
+  %vec0 = insertelement <4 x i32> undef, i32 %elt0, i32 0
+  %vec1 = insertelement <4 x i32> %vec0, i32 %elt1, i32 1
+  %vec2 = insertelement <4 x i32> %vec1, i32 %elt2, i32 2
+  %vec3 = insertelement <4 x i32> %vec2, i32 %elt3, i32 3
+  ret <4 x i32> %vec3
+}
+
diff --git a/test/CodeGen/SystemZ/vec-perm-13.ll b/test/CodeGen/SystemZ/vec-perm-13.ll
new file mode 100644
index 000000000000..708d8de53f86
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-perm-13.ll
@@ -0,0 +1,38 @@
+; Test vector shuffles on vectors with implicitly extended elements
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
+; RUN:   FileCheck -check-prefix=CHECK-CODE %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
+; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
+
+define <4 x i16> @f1(<4 x i16> %x) {
+; CHECK-CODE-LABEL: f1:
+; CHECK-CODE: larl [[REG:%r[0-5]]],
+; CHECK-CODE: vl [[MASK:%v[0-9]+]], 0([[REG]])
+; CHECK-CODE: vgbm [[ELT:%v[0-9]+]], 0
+; CHECK-CODE: vperm %v24, %v24, [[ELT]], [[MASK]]
+; CHECK-CODE: br %r14
+
+; CHECK-VECTOR: .space  1                                        
+; CHECK-VECTOR-NEXT: .space  1                                        
+; CHECK-VECTOR-NEXT: .space  1                                        
+; CHECK-VECTOR-NEXT: .space  1                                        
+; CHECK-VECTOR-NEXT: .byte   6
+; CHECK-VECTOR-NEXT: .byte   7
+; CHECK-VECTOR-NEXT: .byte   16
+; CHECK-VECTOR-NEXT: .byte   17
+; CHECK-VECTOR-NEXT: .space  1                                        
+; CHECK-VECTOR-NEXT: .space  1                                        
+; CHECK-VECTOR-NEXT: .space  1                                        
+; CHECK-VECTOR-NEXT: .space  1                                        
+; CHECK-VECTOR-NEXT: .space  1                                        
+; CHECK-VECTOR-NEXT: .space  1                                        
+; CHECK-VECTOR-NEXT: .space  1                                        
+; CHECK-VECTOR-NEXT: .space  1                                        
+
+  %elt = extractelement <4 x i16> %x, i32 3
+  %vec1 = insertelement <4 x i16> undef, i16 %elt, i32 2
+  %vec2 = insertelement <4 x i16> %vec1, i16 0, i32 3
+  ret <4 x i16> %vec2
+}
+
diff --git a/test/CodeGen/SystemZ/xor-01.ll b/test/CodeGen/SystemZ/xor-01.ll
index e0aaffbb257e..281f386ce955 100644
--- a/test/CodeGen/SystemZ/xor-01.ll
+++ b/test/CodeGen/SystemZ/xor-01.ll
@@ -1,6 +1,6 @@
 ; Test 32-bit XORs in which the second operand is variable.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
 
 declare i32 @foo()